twin_h2_c2_comparison_diff_twin_match <- readRDS(file=‘~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/MaTCHComparisonAnalysis/twin_h2_c2_comparison_diff_twin_match.rds’)— title: “Aetna Twins” author: “Chirag Lakhani” date: “8/9/2018” output: html_document —

Functions for Analysis

Generate Dataframes

Read Data and setup allphewas_both

Create allphewas_both_zip

Create allphewas_both_zip_fixed_effect

allphewas_binary_zipcode_fixed_effect <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twin_binary_zipcode_fixed_effect.csv", 
    " ", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X61 = col_integer(),
##   X62 = col_integer(),
##   X63 = col_integer(),
##   X64 = col_integer(),
##   X65 = col_integer()
## )
## See spec(...) for full column specifications.
names(allphewas_binary_zipcode_fixed_effect) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/names_binary_zipcode_fixed_effect.rds')


allphewas_binary_zipcode_fixed_effect <- allphewas_binary_zipcode_fixed_effect %>%
  mutate(h2_liab_SE_se = ifelse(phewas_code == '984',h2_SE,h2_liab_SE_se)) %>%
  mutate(rliabss_SE_se = ifelse(phewas_code == '984',r_SS_SE,rliabss_SE_se)) %>%
  mutate(c2_liab_SE_se = ifelse(phewas_code == '984',c2_SE,c2_liab_SE_se)) %>%
    mutate(rliabos_SE_se = ifelse(phewas_code == '984',r_OS_SE,rliabos_SE_se)) 






names(allphewas_binary_zipcode_fixed_effect) <- sub("_se", "", names(allphewas_binary_zipcode_fixed_effect))



allphewas_binary_zipcode_fixed_effect <- allphewas_binary_zipcode_fixed_effect %>% inner_join(phewas, ., by='phewas_code') 
## Warning: Column `phewas_code` joining factor and character vector, coercing
## into character vector
missingDFZipcode_fixed_effect <- allphewas_binary_zipcode_fixed_effect  %>% 
  right_join(.,allphewas_binary %>% select(phewas_code), by='phewas_code') %>% 
  filter(is.na(phewas_description)) %>% select(phewas_code)



saveRDS(missingDFZipcode_fixed_effect, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/orchestraAnalysis/missingDFZipcode_fixed_effect.rds')

FDR Correction

  allphewas_both <- allphewas_both %>%
  mutate(h2_liab.zscore=h2_liab/h2_liab_SE) %>%
                  mutate(c2_liab.zscore=c2_liab/c2_liab_SE) %>%
                  mutate(h2_liab.pvalue=2*pnorm(-abs(h2_liab.zscore))) %>%
                  mutate(c2_liab.pvalue=2*pnorm(-abs(c2_liab.zscore))) %>%
                  mutate(pvalue_h2_FDR = p.adjust(.$h2_liab.pvalue, method='BY')) %>%
                  mutate(pvalue_c2_FDR = p.adjust(.$c2_liab.pvalue, method='BY')) %>%
                    mutate(pvalue_h2_bonferroni = p.adjust(.$h2_liab.pvalue, method='bonferroni')) %>%
                  mutate(pvalue_c2_bonferroni = p.adjust(.$c2_liab.pvalue, method='bonferroni')) %>%
                  mutate(observed_h2 = -log10(h2_liab.pvalue) ) %>%
                  mutate(observed_c2 = -log10(c2_liab.pvalue) ) %>%
                  mutate(rliabss_liab.zscore=rliabss/rliabss_SE) %>%
                  mutate(rliabos_liab.zscore=rliabos/rliabos_SE) %>%
                  mutate(rliabss_liab.pvalue=2*pnorm(-abs(rliabss_liab.zscore))) %>%
                  mutate(rliabos_liab.pvalue=2*pnorm(-abs(rliabos_liab.zscore))) %>%
                  mutate(observed_rliabss = -log10(rliabss_liab.pvalue) ) %>%
                  mutate(observed_rliabos = -log10(rliabos_liab.pvalue) ) %>%
    mutate(tot_e2=h2_liab+c2_liab) %>%
  mutate(tot_e2_SE=sqrt(h2_liab_SE^2+c2_liab_SE^2)) %>%
  mutate(tot_e2.zscore=tot_e2/tot_e2_SE) %>%
  mutate(tot_e2.pvalue=2*pnorm(-abs(tot_e2.zscore))) %>%
  mutate(tot_e2.pvalue_FDR = p.adjust(tot_e2.pvalue, method='BY')) %>%
  mutate(observed_tot_e2 = -log10(tot_e2.pvalue) ) %>%
    mutate(e2=1-h2_liab-c2_liab) %>%
  mutate(e2_SE=sqrt(h2_liab_SE^2 + c2_liab_SE^2)) %>%
  mutate(e2.zscore=e2/e2_SE) %>%
  mutate(e2.pvalue=2*pnorm(-abs(e2.zscore))) %>%
  mutate(e2.pvalue_FDR = p.adjust(e2.pvalue, method='BY')) %>%
  mutate(observed_e2 = -log10(e2.pvalue) ) %>%
  mutate(beta_gender.zscore =`as.factor(Gender)M`/`as.factor(Gender)M_SE`) %>%
  mutate(beta_gender.pvalue=2*pnorm(-abs(beta_gender.zscore))) %>%
  mutate(pvalue_beta_gender_FDR = p.adjust(.$beta_gender.pvalue, method='BY')) %>%
  mutate(beta_age.zscore =MemberBirthYear/MemberBirthYear_SE) %>%
  mutate(beta_age.pvalue=2*pnorm(-abs(beta_age.zscore))) %>%
  mutate(pvalue_beta_age_FDR = p.adjust(.$beta_age.pvalue, method='BY')) 




  allphewas_both_zipcode <- allphewas_both_zipcode %>%
  mutate(h2_liab.zscore=h2_liab/h2_liab_SE) %>%
                  mutate(c2_liab.zscore=c2_liab/c2_liab_SE) %>%
                  mutate(h2_liab.pvalue=2*pnorm(-abs(h2_liab.zscore))) %>%
                  mutate(c2_liab.pvalue=2*pnorm(-abs(c2_liab.zscore))) %>%
                  mutate(pvalue_h2_FDR = p.adjust(.$h2_liab.pvalue, method='BY')) %>%
                  mutate(pvalue_c2_FDR = p.adjust(.$c2_liab.pvalue, method='BY')) %>%
                  mutate(observed_h2 = -log10(h2_liab.pvalue) ) %>%
                  mutate(observed_c2 = -log10(c2_liab.pvalue) ) %>%
                  mutate(rliabss_liab.zscore=rliabss/rliabss_SE) %>%
                  mutate(rliabos_liab.zscore=rliabos/rliabos_SE) %>%
                  mutate(rliabss_liab.pvalue=2*pnorm(-abs(rliabss_liab.zscore))) %>%
                  mutate(rliabos_liab.pvalue=2*pnorm(-abs(rliabos_liab.zscore))) %>%
                  mutate(observed_rliabss = -log10(rliabss_liab.pvalue) ) %>%
                  mutate(observed_rliabos = -log10(rliabos_liab.pvalue) ) %>%
    mutate(tot_e2=h2_liab+c2_liab) %>%
  mutate(tot_e2_SE=sqrt(h2_liab_SE^2+c2_liab_SE^2)) %>%
  mutate(tot_e2.zscore=tot_e2/tot_e2_SE) %>%
  mutate(tot_e2.pvalue=2*pnorm(-abs(tot_e2.zscore))) %>%
  mutate(tot_e2.pvalue_FDR = p.adjust(tot_e2.pvalue, method='BY')) %>%
  mutate(observed_tot_e2 = -log10(tot_e2.pvalue) ) %>%
    mutate(e2=1-h2_liab-c2_liab-rliabincome-rliabtemp-rliabaqi) %>%
  mutate(e2_SE=sqrt(h2_liab_SE^2+c2_liab_SE^2 + rliabincome_SE^2 + rliabaqi_SE^2 + rliabtemp_SE^2  )) %>%
  mutate(e2.zscore=e2/e2_SE) %>%
  mutate(e2.pvalue=2*pnorm(-abs(e2.zscore))) %>%
  mutate(e2.pvalue_FDR = p.adjust(e2.pvalue, method='BY')) %>%
  mutate(observed_e2 = -log10(e2.pvalue) ) %>%
                      mutate(rliabincome.zscore=rliabincome/rliabincome_SE) %>%
                  mutate(rliabincome.pvalue=2*pnorm(-abs(rliabincome.zscore))) %>%
                  mutate(rliabincome.pvalue_FDR = p.adjust(rliabincome.pvalue, method='BY')) %>%
                  mutate(observed_rliabincome = -log10(rliabincome.pvalue) ) %>%
mutate(rliabaqi.zscore=rliabaqi/rliabaqi_SE) %>%
  mutate(rliabaqi.pvalue=2*pnorm(-abs(rliabaqi.zscore))) %>%
  mutate(rliabaqi.pvalue_FDR = p.adjust(rliabaqi.pvalue, method='BY')) %>%
  mutate(observed_rliabaqi = -log10(rliabaqi.pvalue) ) %>%
mutate(rliabtemp.zscore=rliabtemp/rliabtemp_SE) %>%
  mutate(rliabtemp.pvalue=2*pnorm(-abs(rliabtemp.zscore))) %>%
  mutate(rliabtemp.pvalue_FDR = p.adjust(rliabtemp.pvalue, method='BY')) %>%
  mutate(observed_rliabtemp = -log10(rliabtemp.pvalue) ) 

  
  rm(cnt_df, cost_df, allphewas_binary, allphewas_binary_zipcode,allphewas_quant_pheno, allphewas_quant_zipcode, phewas)

Short Names

allphewas_both <- allphewas_both %>% mutate(shortName=phewas_description) %>%
    mutate(shortName=ifelse(phewas_code =='313.1','ADHD',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='134577','LDL Cholesterol',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='706.1','Acne',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='495','Asthma',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='637','Low Birthweight',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='464','Acute Sinusitis',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='984','Lead Poisoning',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='871.1','Hand Wound',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='871.3','Foot Wound',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='800.3','Tibia/Fibula Fracture',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='726.3','Bursitis',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='696.4','Psoriasis',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='696.4','Psoriasis',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='7187','Hemoglobin',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='362.1','Retinopathy',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='313','Pervasive Development Disorder',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='313.1','ADHD',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='COST','Cost',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='CNT','Comorbids',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='984','Lead Poisoning',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='367','Low Vision',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='769','Nonallopathic Lesion',shortName)) %>%
        mutate(shortName = ifelse(phewas_code =='465','Respiratory Infections',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='870','Head Wound',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='381','Otitis Media',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='474.2','Chronic Tonsillitis',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='524','Dentofacial Anomalies',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='930','Food Allergy',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='939','Atopic/Contact Dermatitis',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='706','Sebaceous Gland',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='726','Peripheral Enthesopathies',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='859','Complication Due to Implant',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='343','Cerebral Palsy',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='316','Substance Addiction',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='555','Inflammatory Bowel Disease',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='345','Epilepsy, recurrent seizures',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='758','Chromosomal anomalies/genetic disorders',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='747','Cardiac/circulatory congenital anomalies',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='656.2','Respiratory Condition Newborn',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='656','Perinatal Condition Newborn',shortName)) 

    
    
    
    
    
    
    
    
allphewas_both_zipcode <- allphewas_both_zipcode %>% mutate(shortName=phewas_description) %>%
    mutate(shortName=ifelse(phewas_code =='313.1','ADHD',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='134577','LDL Cholesterol',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='706.1','Acne',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='495','Asthma',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='637','Low Birthweight',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='464','Acute Sinusitis',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='984','Lead Poisoning',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='871.1','Hand Wound',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='871.3','Foot Wound',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='800.3','Tibia/Fibula Fracture',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='726.3','Bursitis',shortName)) %>%
  mutate(shortName=ifelse(phewas_code =='696.4','Psoriasis',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='696.4','Psoriasis',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='7187','Hemoglobin',shortName)) %>%
    mutate(shortName=ifelse(phewas_code =='362.1','Retinopathy',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='313','Pervasive Development Disorder',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='313.1','ADHD',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='COST','Cost',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='CNT','Comorbids',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='984','Lead Poisoning',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='367','Low Vision',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='769','Nonallopathic Lesion',shortName)) %>%
        mutate(shortName = ifelse(phewas_code =='465','Respiratory Infections',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='870','Head Wound',shortName)) %>%
      mutate(shortName = ifelse(phewas_code =='381','Otitis Media',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='474.2','Chronic Tonsillitis',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='524','Dentofacial Anomalies',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='930','Food Allergy',shortName)) %>%
    mutate(shortName = ifelse(phewas_code =='939','Atopic/Contact Dermatitis',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='706','Sebaceous Gland',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='726','Peripheral Enthesopathies',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='859','Complication Due to Implant',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='345','Epilepsy/recurrent seizures',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='758','Chromosomal anomalies/genetic disorders',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='747','Cardiac/circulatory congenital anomalies',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='656.2','Respiratory Condition Newborn',shortName)) %>%
  mutate(shortName = ifelse(phewas_code =='656','Perinatal Condition Newborn',shortName)) 

Functional Domain Data frames

phewas_functional_domain_map_quant_trait <- quant_traits_subset %>% 
  inner_join(.,quant_pheno_description, by='phewas_code') %>%
  select(phewas_code, phewas_description=Description) %>% 
mutate(functional_domain = case_when(phewas_code == 'CNT' ~ 'PheWAS Comorbids',
                                     phewas_code == 'COST' ~ 'Avg. Monthly Cost',
                                     TRUE ~ 'Quantitative'))


phewas_functional_domain_map <- phewas_functional_domain_map %>% bind_rows(.,phewas_functional_domain_map_quant_trait)


phewas_functional_domain_map_all <- phewas_functional_domain_map %>% 
  select(phewas_code, phewas_description) %>% mutate(functional_domain = 'All Traits') %>% unique()

phewas_functional_domain_map <- phewas_functional_domain_map %>% bind_rows(.,phewas_functional_domain_map_all)



allphewas_both_functional_domain <- allphewas_both %>% 
  inner_join(.,phewas_functional_domain_map, by=c('phewas_code', 'phewas_description'))


allphewas_both_zipcode_functional_domain <- allphewas_both_zipcode %>% 
  inner_join(.,phewas_functional_domain_map, by=c('phewas_code', 'phewas_description'))


rm(phewas_functional_domain_map_quant_trait,phewas_functional_domain_map_all)

Analysis Overview

Number of Twin Pairs

Total Number of Phenotypes

totalPheno <- nrow(allphewas_both)

totalPheno
## [1] 561

Number of Twin Pairs by Pair Type

SE for p(mz|ss)

countTwin <- demographic_information %>% group_by(genderPairType) %>% tally() %>%  spread(.,genderPairType, n) %>%
  mutate(type = 'binary') %>% mutate(binYOB = 'All')


countTwin_birth <- demographic_information %>% 
  mutate(binYOB = cut(yearT1, breaks=c(1985,1996,2006,2016), dig.lab=4, include.lowest=TRUE, right=FALSE ) ) %>% group_by(binYOB,genderPairType) %>% tally() %>% spread(.,genderPairType, n) %>% ungroup()


countTwin_birth <- countTwin_birth %>% mutate(type='binary')


countTwin_all <- countTwin %>% bind_rows(.,countTwin_birth)
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
countTwin_all <- countTwin_all %>% mutate(SS=FF+MM) %>% mutate(OS = MF)



YearOfBirth <- readRDS('~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/quant_raw_data/revised/quant_pheno_YearOfBirthCount.rds')




count_quant_binYOB <- YearOfBirth %>% filter(phewas_code == '7187') %>% filter(cohort == 'subset') %>% mutate(binYOB = cut(yearBirth, breaks=c(1985,1996,2006,2016),include.lowest = TRUE,dig.lab=4, right=FALSE)) %>% 
  group_by(binYOB, genderConfig, cohort) %>% summarise(n=sum(numPairs)) %>%ungroup()  %>% select(-cohort) %>% spread(.,genderConfig,n) %>% mutate(type = 'quant')



count_quant_all <- YearOfBirth %>% filter(phewas_code == '7187') %>% filter(cohort == 'subset') %>% 
  group_by(genderConfig, cohort) %>% summarise(n=sum(numPairs)) %>%ungroup()  %>% select(-cohort) %>% spread(.,genderConfig,n) %>% mutate(type = 'quant') %>% mutate(binYOB = 'All')



count_everything <- countTwin_all %>% 
  bind_rows(.,count_quant_all,count_quant_binYOB) %>% 
  select(-MM,-MF,-FF) %>% mutate(n=SS+OS ) %>% 
  mutate(pss=SS/n,pmz=1-2*(OS/n)) %>% 
  mutate(p=pmz/pss) %>%
  mutate(var_p = (n*OS)/(SS^3)) %>% mutate(p_SE = sqrt(var_p)) %>%
  mutate(p_low = p-1.96*p_SE, p_high = p+1.96*p_SE) %>% select(-pss,-pmz,-var_p)
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
count_everything <- round_df(count_everything,3)

DT::datatable(count_everything)

Median Number of ICD codes

## # A tibble: 1 x 3
##   median  IQ25  IQ75
##    <dbl> <dbl> <dbl>
## 1     23    12    42
## # A tibble: 3 x 4
##   genderPairType median  IQ25  IQ75
##   <chr>           <dbl> <dbl> <dbl>
## 1 FF                 23    12    41
## 2 MF                 24    13    44
## 3 MM                 22    11    41

Median Number of Age Start year codes

## # A tibble: 1 x 3
##   median  IQ25  IQ75
##    <dbl> <dbl> <dbl>
## 1      7     3    13
## # A tibble: 3 x 4
##   genderPairType median  IQ25  IQ75
##   <chr>           <dbl> <dbl> <dbl>
## 1 FF                  8     3    14
## 2 MF                  7     2    12
## 3 MM                  8     3    13

Median Number of Month Enrollment

## # A tibble: 1 x 3
##   median  IQ25  IQ75
##    <dbl> <dbl> <dbl>
## 1     60    45    84
## # A tibble: 3 x 4
##   genderPairType median  IQ25  IQ75
##   <chr>           <dbl> <dbl> <dbl>
## 1 FF                 60    45    84
## 2 MF                 60    45    84
## 3 MM                 60    45    84

Distinct Zipcodes

## # A tibble: 1 x 1
##   Unique_Elements
##             <int>
## 1           11666
## # A tibble: 3 x 2
##   genderPairType Unique_Elements
##   <chr>                    <int>
## 1 FF                        7302
## 2 MF                        7466
## 3 MM                        7235

Prevalence Counts

prevalence_aggregate <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/prevalence_data/prevalence_aggregate.rds')


phewas_functional_domain_map_binary_no_all <- phewas_functional_domain_map %>% 
  filter(functional_domain !='Quantitative') %>%
filter(functional_domain !='Avg. Monthly Cost') %>%
filter(functional_domain !='PheWAS Comorbids') %>%
  filter(functional_domain !='All Traits') 



prev_all <- prevalence_aggregate %>% 
            group_by(phewas_code, PheWAS_Indicator) %>% 
            summarise(n=sum(countTwin)) %>%
            spread(PheWAS_Indicator, n, fill=0) %>%
            mutate(prevalence = `1` / (`0` + `1`) ) %>%
            right_join(., phewas_functional_domain_map_binary_no_all, by='phewas_code') %>%
            mutate(category = 'All')



prev_M <- prevalence_aggregate %>% 
            group_by(phewas_code, PheWAS_Indicator, Gender) %>% 
            summarise(n=sum(countTwin)) %>%
            filter(Gender == 'M') %>% select(-Gender) %>%
            spread(PheWAS_Indicator, n, fill=0) %>%
            mutate(prevalence = `1` / (`0` + `1`) ) %>%
            right_join(., phewas_functional_domain_map_binary_no_all, by='phewas_code') %>%
            mutate(category = 'Male')



prev_F <- prevalence_aggregate %>% 
            group_by(phewas_code, PheWAS_Indicator, Gender) %>% 
            summarise(n=sum(countTwin)) %>%
            filter(Gender == 'F') %>% select(-Gender) %>%
            spread(PheWAS_Indicator, n, fill=0) %>%
            mutate(prevalence = `1` / (`0` + `1`) ) %>%
            right_join(., phewas_functional_domain_map_binary_no_all, by='phewas_code') %>%
            mutate(category = 'Female')


prev <- prev_all %>% rbind(.,prev_M) %>% rbind(., prev_F) %>% filter(!is.na(functional_domain ))


rm(prev_all, prev_F, prev_M)



#functional_domain_summary_all <- prev %>% select(phewas_code,prevalence, category) %>% unique() %>% group_by(category) %>%summarise(median = median(prevalence, na.rm = TRUE), IQ25 = quantile(prevalence, .25), IQ75 = quantile(prevalence,.75), numTraits = n_distinct(phewas_code)) 


#functional_domain_summary_all$functional_domain <- 'All'

#functional_domain_summary <- prev %>% group_by(functional_domain, category) %>% summarise(median = median(prevalence, na.rm = TRUE), IQ25 = quantile(prevalence, .25), IQ75 = quantile(prevalence,.75), numTraits = n_distinct(phewas_code))

#functional_domain_summary <- functional_domain_summary %>% bind_rows(.,functional_domain_summary_all)

#DT::datatable(functional_domain_summary) %>% 
#  formatSignif('median',2) %>% 
#  formatSignif('IQ25',2) %>% 
#  formatSignif('IQ75',2)

#DT::datatable(functional_domain_summary %>% select(functional_domain) %>% unique())


DT::datatable(prev)

Boxplots of Prevalence by Functional Category

plot_fig <- ggplot(prev, aes(functional_domain, prevalence)) +
  geom_boxplot(aes(colour=category), outlier.alpha = 0.4) + 
   scale_y_log10() +
  theme(axis.text.x=element_text(angle=45, size = 8),
        axis.title.x = element_text(vjust=10)) +
 ylab('Prevalence') + xlab('Functional Domain') +
  theme(legend.position="bottom",legend.box = "horizontal")




ggsave(plot_fig, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/prevalence_functional_category.png', height=3.5, width=6)

plot_fig

Map of Twin Pairs and SES/Population Density Distribution

map_prevalence_data <- readRDS(all,file = '~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/maps/map_prevalence_data.rds')

pp <- ggplot() +
  geom_polygon(data = map_prevalence_data, 
               aes(x = long, y = lat, group = group, fill = numPairs), 
               color = "black", size = 0.25) + 
  coord_map() +
  labs(fill = "Number of Twin Pairs") + xlim(-140,-66) + theme_map() 

#pp


df_Pop <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/SES_Analysis/popDensity_twin_ACS.rds')

df_SES <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/SES_Analysis/SES_twin_ACS.rds')



popDensityPlot <- ggplot(df_Pop, aes(x=densityBin, y=normalizedPop)) + 
  geom_bar(stat='identity', aes(fill = cohort))  + 
  facet_wrap(~cohort,nrow = 1) +
  theme(axis.text.x = element_text(angle = 60, size=1), 
        axis.title.x = element_text(size=8),
        axis.title.y = element_text(size=8)) + 
  xlab("Log of Population Per Square Mile Area") + 
  ylab("% Total Population")




sesDensityPlot <- ggplot(df_SES, aes(x=SES_bin, y=normalizedPop)) + 
  geom_bar(stat='identity', aes(fill = cohort))  + 
  facet_wrap(~cohort,nrow = 1) +
  theme(axis.text.x = element_text(angle = 60, size=1), 
        axis.title.x = element_text(size=8),
        axis.title.y = element_text(size=8)) + 
  xlab("Depravity Index Score") + ylab("% Total Population") 




figure1a <- ggarrange(popDensityPlot,sesDensityPlot,labels = c( 'b)','c)'),  
                      common.legend=TRUE, ncol = 2, legend = 'bottom',
                      font.label = list(size = 10))

figure1 <- ggarrange(pp,figure1a,labels = c( "a)", ''),  common.legend=FALSE, nrow = 2, ncol = 1,legend = 'top',
                     font.label = list(size = 10))

#figure1


ggsave(figure1,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1.png')
## Saving 7 x 5 in image

Correlation between Depravity Index and Median Family Income for 500 Cities Data

PCLoadings_500Cities <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/data/PCLoadings_500Cities.rds')


cor_fit_deprav_income <- cor.test(-PCLoadings_500Cities$PC1,PCLoadings_500Cities$medianincome)


cor_fit_deprav_income
## 
##  Pearson's product-moment correlation
## 
## data:  -PCLoadings_500Cities$PC1 and PCLoadings_500Cities$medianincome
## t = 269.61, df = 26565, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8525273 0.8589647
## sample estimates:
##       cor 
## 0.8557791

Meta-Analysis

Setup

all_traits_df <- data.frame(functional_domain = 'All Traits', 
                            r_MZ = 0.636, 
                            r_MZ_SE = 0.002, 
                            r_DZ=0.339,
                            r_DZ_SE = 0.003,
                            h2 = 0.488,
                            h2_SE = 0.005,
                            c2 = 0.174,
                            c2_SE = 0.004,
                            h2_corr = 0.593,
                            h2_corr_SE = 0.008,
                            c2_corr = 0.042,
                            c2_corr_SE = 0.007,
                            r_MZ_nstudies = NA ,
                            r_MZ_ntraits = 9568,
                            r_DZ_nstudies = NA,
                            r_DZ_ntraits = 5220,
                            h2_nstudies = NA,
                            h2_ntraits = 2929,
                            c2_nstudies = NA,
                            c2_ntraits = 2771)



functional_domain_df <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/match_phewas_mapping/functional_domain/functional_domain.rds')


functional_domain_df <- functional_domain_df %>% rbind(.,all_traits_df)


functional_domain_df <-   functional_domain_df %>% 
  filter(functional_domain != 'Socialinteractions') %>%
  filter(functional_domain != 'Socialvalues') %>%
  filter(functional_domain != 'Cell') %>%
  filter(functional_domain != 'Activities') %>%
  filter(functional_domain != 'Nutritional') %>%
  filter(functional_domain != 'Mortality') %>%
filter(functional_domain != 'Muscular')

h2 meta-analysis

zz <- allphewas_both_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=h2_liab,sei=h2_liab_SE,data=., method='DL'))
## Warning in rma(yi = h2_liab, sei = h2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.

## Warning in rma(yi = h2_liab, sei = h2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.

## Warning in rma(yi = h2_liab, sei = h2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b_h2 <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, h2_liab,1:ncol(.))

phewas_se_h2 <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, h2_liab_SE,1:ncol(.))

phewas_meta_h2_liab_all <- phewas_b_h2 %>% inner_join(.,phewas_se_h2, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_h2_liab_all) <- c('functional_domain','h2','h2_SE', 'category')




MaTCH_h2 <- functional_domain_df %>% select(functional_domain, h2_corr, h2_corr_SE) %>% mutate(category = 'MaTCH')

names(MaTCH_h2) <- c('functional_domain','h2','h2_SE', 'category')


metaAll_justMatch_h2 <- phewas_meta_h2_liab_all %>% rbind(.,MaTCH_h2) 


metaAll_justMatch_h2 <- metaAll_justMatch_h2 %>%
  mutate(h2_low = h2-1.96*h2_SE, h2_high = h2+1.96*h2_SE )

DT::datatable(round_df(metaAll_justMatch_h2, digits=3))
rm(zz, phewas_b_h2, phewas_se_h2, MaTCH_h2)

c2 meta-analysis

zz <- allphewas_both_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=c2_liab,sei=c2_liab_SE,data=., method='DL'))
## Warning in rma(yi = c2_liab, sei = c2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.

## Warning in rma(yi = c2_liab, sei = c2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.

## Warning in rma(yi = c2_liab, sei = c2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, c2_liab,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, c2_liab_SE,1:ncol(.))

phewas_meta_c2_liab_all <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_c2_liab_all) <- c('functional_domain','c2','c2_SE', 'category')




MaTCH <- functional_domain_df %>% select(functional_domain, c2_corr, c2_corr_SE) %>% mutate(category = 'MaTCH')

names(MaTCH) <- c('functional_domain','c2','c2_SE', 'category')


metaAll_justMatch_c2 <- phewas_meta_c2_liab_all %>% rbind(.,MaTCH) 


metaAll_justMatch_c2 <- metaAll_justMatch_c2 %>%
  mutate(c2_low = c2-1.96*c2_SE, c2_high = c2+1.96*c2_SE )

DT::datatable(round_df(metaAll_justMatch_c2, digits=3))
rm(zz, phewas_b, phewas_se, MaTCH)

e2 meta-analysis

zz <- allphewas_both_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=e2,sei=e2_SE,data=., method='DL'))
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.

## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.

## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, e2,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, e2_SE,1:ncol(.))

phewas_meta_e2_liab_all <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_e2_liab_all) <- c('functional_domain','e2','e2_SE', 'category')






rm(zz, phewas_b, phewas_se)

rss meta-analysis

zz <- allphewas_both_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabss,sei=rliabss_SE,data=., method='DL'))
## Warning in rma(yi = rliabss, sei = rliabss_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabss, sei = rliabss_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabss, sei = rliabss_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabss,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabss_SE,1:ncol(.))

phewas_meta_rss_liab_all <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_rss_liab_all) <- c('functional_domain','rSS','rSS_SE', 'category')



output_rliabSS <- phewas_meta_rss_liab_all %>%
  mutate(rSS_low = rSS-1.96*rSS_SE, rSS_high = rSS+1.96*rSS_SE )

DT::datatable(round_df(output_rliabSS, digits=3))

ros meta-analysis

zz <- allphewas_both_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% filter(rliabos_SE > .00000001) %>%
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabos,sei=rliabos_SE,data=., method='DL'))


phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabos,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabos_SE,1:ncol(.))

phewas_meta_ros_liab_all <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_ros_liab_all) <- c('functional_domain','rOS','rOS_SE', 'category')


output_rliabOS <- phewas_meta_ros_liab_all %>%
  mutate(rOS_low = rOS-1.96*rOS_SE, rOS_high = rOS+1.96*rOS_SE )

DT::datatable(round_df(output_rliabOS, digits=3))

h2/c2 MaTCH Plot

metaAll_justMatch_h2$functional_domain <- factor(metaAll_justMatch_h2$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal',
                                                            'Quantitative',
                                                            'Avg. Monthly Cost',
                                                            'PheWAS Comorbids'))



metaAll_justMatch_c2$functional_domain <- factor(metaAll_justMatch_c2$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal',
                                                            'Quantitative',
                                                            'Avg. Monthly Cost',
                                                            'PheWAS Comorbids'))






h2_liab_ggplot <- ggplot(metaAll_justMatch_h2, aes(y=h2, x=functional_domain, colour=category)) + 
  geom_point(position = position_dodge(width = .5)) +  
  geom_errorbar(aes(ymin=h2-1.96*h2_SE, ymax=h2+1.96*h2_SE),position = position_dodge(width = 0.5)) + 
  coord_flip() +  
  theme(axis.text=element_text(size=8))  + 
  scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
  theme(axis.title = element_text()) + ylab('Heritability (h2)') + xlab('Functional Domain')


ggsave(h2_liab_ggplot,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/h2_liab_comparison_match.png', height=3.5, width=6)
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
c2_liab_ggplot <- ggplot(metaAll_justMatch_c2, aes(y=c2, x=functional_domain, colour=category)) + 
  geom_point(position = position_dodge(width = .5)) +  
  geom_errorbar(aes(ymin=c2-1.96*c2_SE, ymax=c2+1.96*c2_SE),position = position_dodge(width = 0.5)) + 
  coord_flip() +     
  theme(axis.text=element_text(size=8))  + 
  scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
  theme(axis.title = element_text()) + ylab('Shared Environmental Variance (c2)') + xlab('Functional Domain')



ggsave(c2_liab_ggplot,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/c2_liab_comparison_match.png', height=3.5, width=6)
## Warning: Removed 3 rows containing missing values (geom_point).

## Warning: Removed 3 rows containing missing values (geom_errorbar).
h2_c2_MaTCH <- ggarrange(h2_liab_ggplot,c2_liab_ggplot,labels = c( "a)", 'b)'), nrow = 2, ncol = 1, common.legend=TRUE,legend='bottom' )
## Warning: Removed 3 rows containing missing values (geom_point).

## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
#h2_c2_MaTCH

ggsave(h2_c2_MaTCH,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/h2_c2_MaTCH.png', height = 10, width=8)

Barplot by functional group

phewas_meta_h2_liab_all$stat <- 'h2'
phewas_meta_c2_liab_all$stat <- 'c2'
#phewas_meta_e2_liab_all$stat <- 'e2'
phewas_meta_rss_liab_all$stat <- 'rOS'
phewas_meta_ros_liab_all$stat <- 'rSS'

a <- phewas_meta_h2_liab_all %>% select(functional_domain, statistic = h2, stat)
b <- phewas_meta_c2_liab_all %>% select(functional_domain, statistic = c2, stat)
#c <- phewas_meta_e2_liab_all %>% select(functional_domain, statistic = e2, stat)
d <- phewas_meta_ros_liab_all %>% select(functional_domain, statistic = rOS, stat)
e <- phewas_meta_rss_liab_all %>% select(functional_domain, statistic = rSS, stat)

z_meta_all <- a %>% bind_rows(.,b,d,e)

rm(a,b,d,e)

z_meta_all$stat <- factor(z_meta_all$stat, levels=c('rOS','rSS', 'h2','c2'))


z_meta_all$functional_domain <- factor(z_meta_all$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal',
                                                            'Quantitative',
                                                            'Avg. Monthly Cost',
                                                            'PheWAS Comorbids'))



barchart_fn_domain <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) + 
  geom_bar(stat="identity", position="dodge", width = 0.75, colour="black") +
  theme(axis.text.x=element_text(angle=45), text=element_text(size=8) ) +
    theme(axis.title = element_text()) +
ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic') 

barchart_fn_domain

ggsave(barchart_fn_domain, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/barchart_fn_domain.png')
## Saving 7 x 5 in image

Barplot with environmental information

meta-analysis h2 with environmental variables

zz <- allphewas_both_zipcode_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=h2_liab,sei=h2_liab_SE,data=., method='DL'))
## Warning in rma(yi = h2_liab, sei = h2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.

## Warning in rma(yi = h2_liab, sei = h2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, h2_liab,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, h2_liab_SE,1:ncol(.))

environment_meta_h2_liab_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(environment_meta_h2_liab_all_environment) <- c('functional_domain','h2','h2_SE', 'category')


environment_meta_h2_liab_all_environment <- environment_meta_h2_liab_all_environment %>% 
  mutate(h2_low=h2-1.96*h2_SE,h2_high=h2+1.96*h2_SE )


DT::datatable(round_df(environment_meta_h2_liab_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)

meta-analysis c2 with environmental variables

zz <- allphewas_both_zipcode_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=c2_liab,sei=c2_liab_SE,data=., method='DL'))
## Warning in rma(yi = c2_liab, sei = c2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.

## Warning in rma(yi = c2_liab, sei = c2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, c2_liab,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, c2_liab_SE,1:ncol(.))

environment_meta_c2_liab_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(environment_meta_c2_liab_all_environment) <- c('functional_domain','c2','c2_SE', 'category')

environment_meta_c2_liab_all_environment <- environment_meta_c2_liab_all_environment %>% mutate(c2_low=c2-1.96*c2_SE,c2_high=c2+1.96*c2_SE )

DT::datatable(round_df(environment_meta_c2_liab_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)

meta-analysis e2 with environmental variables

zz <- allphewas_both_zipcode_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=e2,sei=e2_SE,data=., method='DL'))
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.

## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.

## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.

## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.

## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.

## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, e2,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, e2_SE,1:ncol(.))

environment_meta_e2_liab_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(environment_meta_e2_liab_all_environment) <- c('functional_domain','e2','e2_SE', 'category')


environment_meta_e2_liab_all_environment <- environment_meta_e2_liab_all_environment %>% 
  mutate(e2_low=e2-1.96*e2_SE,e2_high=e2+1.96*e2_SE )


DT::datatable(round_df(environment_meta_e2_liab_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)

meta-analysis rliabincome with environmental variables

zz <- allphewas_both_zipcode_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabincome,sei=rliabincome_SE,data=., method='DL'))
## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabincome,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabincome_SE,1:ncol(.))

environment_meta_rliabincome_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(environment_meta_rliabincome_all_environment) <- c('functional_domain','var_income','var_income_SE', 'category')



environment_meta_rliabincome_all_environment <- environment_meta_rliabincome_all_environment %>% 
  mutate(var_income_low=var_income-1.96*var_income_SE,var_income_high=var_income+1.96*var_income_SE )


DT::datatable(round_df(environment_meta_rliabincome_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)

meta-analysis rliabaqi with environmental variables

zz <- allphewas_both_zipcode_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabaqi,sei=rliabaqi_SE,data=., method='DL'))


phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabaqi,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabaqi_SE,1:ncol(.))

environment_meta_rliabaqi_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(environment_meta_rliabaqi_all_environment) <- c('functional_domain','var_aqi','var_aqi_SE', 'category')



environment_meta_rliabaqi_all_environment <- environment_meta_rliabaqi_all_environment %>% 
  mutate(var_aqi_low=var_aqi-1.96*var_aqi_SE, var_aqi_high=var_aqi+1.96*var_aqi_SE )


DT::datatable(round_df(environment_meta_rliabaqi_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)

meta-analysis rliabtemp with environmental variables

zz <- allphewas_both_zipcode_functional_domain %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabtemp,sei=rliabtemp_SE,data=., method='DL'))
## Warning in rma(yi = rliabtemp, sei = rliabtemp_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.

## Warning in rma(yi = rliabtemp, sei = rliabtemp_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabtemp,1:ncol(.))

phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabtemp_SE,1:ncol(.))

environment_meta_rliabtemp_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% 
  mutate(category = 'Insurance Claims')

names(environment_meta_rliabtemp_all_environment) <- c('functional_domain','var_temp','var_temp_SE', 'category')


environment_meta_rliabtemp_all_environment <- environment_meta_rliabtemp_all_environment %>% 
  mutate(var_temp_low=var_temp-1.96*var_temp_SE, var_temp_high=var_temp+1.96*var_temp_SE )


DT::datatable(round_df(environment_meta_rliabtemp_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)
DT::datatable(allphewas_both_zipcode %>% filter(rliabincome.pvalue_FDR <= 0.05) %>% summarise(n=n())   )
DT::datatable(allphewas_both_zipcode %>% filter(rliabaqi.pvalue_FDR <= 0.05) %>% summarise(n=n())   )
DT::datatable(allphewas_both_zipcode %>% filter(rliabtemp.pvalue_FDR <= 0.05) %>% summarise(n=n())   )
var_environment_confidence_interval <- allphewas_both_zipcode %>% 
  select(phewas_code, phewas_description, rliabincome, rliabincome_SE, rliabaqi, rliabaqi_SE, rliabtemp, rliabtemp_SE) %>%
  mutate(rliabincome_low=rliabincome-1.96*rliabincome_SE, rliabincome_high=rliabincome+1.96*rliabincome_SE) %>%
  mutate(rliabaqi_low=rliabaqi-1.96*rliabaqi_SE, rliabaqi_high=rliabaqi+1.96*rliabaqi_SE) %>%
  mutate(rliabtemp_low=rliabtemp-1.96*rliabtemp_SE, rliabtemp_high=rliabtemp+1.96*rliabtemp_SE) %>%
  select(phewas_code, phewas_description, rliabincome,rliabincome_low,rliabincome_high,rliabaqi,rliabaqi_low,rliabaqi_high,rliabtemp,rliabtemp_low,rliabtemp_high)


DT::datatable(round_df(var_environment_confidence_interval,digits = 3))

Barplot by functional group - environment

environment_meta_h2_liab_all_environment$stat <- 'h2'
environment_meta_c2_liab_all_environment$stat <- 'c2'
#environment_meta_e2_liab_all_environment$stat <- 'e2'
environment_meta_rliabincome_all_environment$stat <- 'SES'
environment_meta_rliabaqi_all_environment$stat <- 'AQI'
environment_meta_rliabtemp_all_environment$stat <- 'temperature'


a <- environment_meta_h2_liab_all_environment %>% select(functional_domain, statistic = h2,se=h2_SE, stat)
b <- environment_meta_c2_liab_all_environment %>% select(functional_domain, statistic = c2, se=c2_SE, stat)
#c <- environment_meta_e2_liab_all_environment %>% select(functional_domain, statistic = e2, stat)
d <- environment_meta_rliabincome_all_environment %>% select(functional_domain, statistic = var_income,se=var_income_SE, stat)
e <- environment_meta_rliabaqi_all_environment %>% select(functional_domain, statistic = var_aqi,se=var_aqi_SE, stat)
f <- environment_meta_rliabtemp_all_environment %>% select(functional_domain, statistic = var_temp,se=var_temp_SE, stat)


z_meta_all <- a %>% bind_rows(.,b,d,e,f)

rm(a,b,d,e,f)

z_meta_all$stat <- factor(z_meta_all$stat, levels=c('h2','c2','SES','AQI','temperature'))


z_meta_all$functional_domain <- factor(z_meta_all$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal',
                                                            'Quantitative',
                                                            'Avg. Monthly Cost',
                                                            'PheWAS Comorbids'))


size_text <- 4

barchart_fn_domain_environment <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) + 
  geom_bar(stat="identity", position="dodge", width = 0.75, colour="black", size=.2) +
  theme(legend.position=c(.25,.75),legend.direction = "horizontal") +     
  theme(axis.text=element_text(size=size_text))  +
      theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text,vjust=4),
        axis.title.y = element_text(size=size_text,vjust=-1)) +
  theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
  theme(legend.key.size = unit(.2, "cm")) +
    geom_errorbar(aes(ymin=statistic-1.96*se, 
                    ymax=statistic+1.96*se),
                width=.5,size=.2,
                position=position_dodge(.9))



barchart_fn_domain_environment

#ggsave(barchart_fn_domain_environment, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/barchart_fn_domain_environment.png')

braden_df_meta_analytic_estimates_environment <- z_meta_all




ggsave(barchart_fn_domain_environment, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_5.jpg',dpi='print',
       device = 'jpeg', width = 3,height= 2)

Barplot by functional group - just environment

environment_meta_rliabincome_all_environment$stat <- 'SES'
environment_meta_rliabaqi_all_environment$stat <- 'AQI'
environment_meta_rliabtemp_all_environment$stat <- 'temperature'


d <- environment_meta_rliabincome_all_environment %>% select(functional_domain, statistic = var_income, se=var_income_SE, stat)
e <- environment_meta_rliabaqi_all_environment %>% select(functional_domain, statistic = var_aqi, se=var_aqi_SE, stat)
f <- environment_meta_rliabtemp_all_environment %>% select(functional_domain, statistic = var_temp,se=var_temp_SE, stat)


z_meta_all <- d %>% bind_rows(.,e,f)

rm(d,e,f)

z_meta_all$stat <- factor(z_meta_all$stat, levels=c('SES','AQI','temperature'))


z_meta_all$functional_domain <- factor(z_meta_all$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal',
                                                            'Quantitative',
                                                            'Avg. Monthly Cost',
                                                            'PheWAS Comorbids'))



#barchart_fn_domain_just_environment <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) + 
#  geom_bar(stat="identity", position="dodge", width = 0.75, colour="black") +
#  theme(axis.text.x=element_text(angle=45, size = 8),
#        axis.title.x = element_text(vjust=10)) +
#ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic') +
#  theme(legend.position="bottom")

#barchart_fn_domain_just_environment

#ggsave(barchart_fn_domain_just_environment, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/barchart_fn_domain_just_environment.png')




size_text <- 4

barchart_fn_domain_just_environment <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) + 
  geom_bar(stat="identity", position="dodge", width = 0.75, colour="black", size=.2) +
  theme(legend.position=c(.3,.75),legend.direction = "vertical") +     
  theme(axis.text=element_text(size=size_text))  +
      theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text,vjust=4),
        axis.title.y = element_text(size=size_text,vjust=-1)) +
  theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
  theme(legend.key.size = unit(.2, "cm")) +
    geom_errorbar(aes(ymin=statistic-1.96*se, 
                    ymax=statistic+1.96*se),
                width=.5,size=.2,
                position=position_dodge(.9))



barchart_fn_domain_just_environment

ggsave(barchart_fn_domain_just_environment, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_11.jpg',dpi='print',
       device = 'jpeg', width = 3,height= 2)

Number of Phenotypes in each Category with FDR and h2/c2 and number of additive traits

h2

h2_meta <- metaAll_justMatch_h2 %>% filter(category == 'Insurance Claims') %>%
  mutate(h2_low = h2 - 1.96*h2_SE, h2_high= h2+ 1.96*h2_SE) %>% ungroup()


h2_fdr <- allphewas_both_functional_domain %>% group_by(functional_domain) %>% summarise(count_n=n(), fdr_threshold=sum(pvalue_h2_FDR <= 0.05)) %>% mutate(pct_FDR = (fdr_threshold/count_n)*100.0) %>% ungroup()


h2_meta_fdr <- h2_meta %>% inner_join(.,h2_fdr, by='functional_domain')
## Warning: Column `functional_domain` joining factor and character vector,
## coercing into character vector
h2_meta_fdr <- round_df(h2_meta_fdr, digits=3)

DT::datatable(h2_meta_fdr)

c2

c2_meta <- metaAll_justMatch_c2 %>% filter(category == 'Insurance Claims') %>%
  mutate(c2_low = c2 - 1.96*c2_SE, c2_high= c2+ 1.96*c2_SE) %>% ungroup()


c2_fdr <- allphewas_both_functional_domain %>% group_by(functional_domain) %>% summarise(count_n=n(), fdr_threshold=sum(pvalue_c2_FDR <= 0.05)) %>% mutate(pct_FDR = (fdr_threshold/count_n)*100.0) %>% ungroup()


c2_meta_fdr <- c2_meta %>% inner_join(.,c2_fdr, by='functional_domain')
## Warning: Column `functional_domain` joining factor and character vector,
## coercing into character vector
c2_meta_fdr <- round_df(c2_meta_fdr, digits=3)

DT::datatable(c2_meta_fdr)

e2

e2_meta <- phewas_meta_e2_liab_all %>% filter(category == 'Insurance Claims') %>%
  mutate(e2_low = e2 - 1.96*e2_SE, e2_high= e2+ 1.96*e2_SE) %>% ungroup()


e2_fdr <- allphewas_both_functional_domain %>% group_by(functional_domain) %>% summarise(count_n=n(), fdr_threshold=sum(e2.pvalue_FDR <= 0.05)) %>% mutate(pct_FDR = (fdr_threshold/count_n)*100.0) %>% ungroup()


e2_meta_fdr <- e2_meta %>% inner_join(.,e2_fdr, by='functional_domain')


e2_meta_fdr <- round_df(e2_meta_fdr, digits=3)

DT::datatable(e2_meta_fdr)

SES variance component

#environment_meta_rliabincome_all_environment

#e2_meta <- environment_meta_rliabincome_all_environment %>% filter(category == 'Insurance Claims') %>%
#  mutate(var_income_low = var_income - 1.96*e2_SE, e2_high= e2+ 1.96*e2_SE) %>% ungroup()


SES_fdr <- allphewas_both_zipcode_functional_domain %>% 
  filter(!is.na(rliabincome.pvalue_FDR)) %>%
  group_by(functional_domain) %>% 
  summarise(count_n=n(), 
            fdr_threshold=sum(rliabincome.pvalue_FDR <= 0.05))  %>% ungroup() %>%
  mutate(pctThreshold=(fdr_threshold/561)*100)


SES_fdr <- environment_meta_rliabincome_all_environment %>% full_join(.,SES_fdr, by='functional_domain')


SES_fdr <- round_df(SES_fdr, digits=3)

DT::datatable(SES_fdr)

AQI variance component

#environment_meta_rliabincome_all_environment

#e2_meta <- environment_meta_rliabincome_all_environment %>% filter(category == 'Insurance Claims') %>%
#  mutate(var_income_low = var_income - 1.96*e2_SE, e2_high= e2+ 1.96*e2_SE) %>% ungroup()


AQI_fdr <- allphewas_both_zipcode_functional_domain %>% 
  filter(!is.na(rliabaqi.pvalue_FDR)) %>%
  group_by(functional_domain) %>% 
  summarise(count_n=n(), 
            fdr_threshold=sum(rliabaqi.pvalue_FDR <= 0.05))  %>% ungroup() %>%
  mutate(pctThreshold=(fdr_threshold/561)*100)


AQI_fdr <- environment_meta_rliabaqi_all_environment %>% full_join(.,AQI_fdr, by='functional_domain')


AQI_fdr <- round_df(AQI_fdr, digits=3)

DT::datatable(AQI_fdr)

Temperature variance component

#environment_meta_rliabincome_all_environment

#e2_meta <- environment_meta_rliabincome_all_environment %>% filter(category == 'Insurance Claims') %>%
#  mutate(var_income_low = var_income - 1.96*e2_SE, e2_high= e2+ 1.96*e2_SE) %>% ungroup()


temp_fdr <- allphewas_both_zipcode_functional_domain %>% 
  filter(!is.na(rliabtemp.pvalue_FDR)) %>%
  group_by(functional_domain) %>% 
  summarise(count_n=n(), 
            fdr_threshold=sum(rliabtemp.pvalue_FDR <= 0.05))  %>% ungroup() %>%
  mutate(pctThreshold=(fdr_threshold/561)*100)


temp_fdr <- environment_meta_rliabtemp_all_environment %>% full_join(.,temp_fdr, by='functional_domain')


temp_fdr <- round_df(temp_fdr, digits=3)

DT::datatable(temp_fdr)

Comparison of old var comp environment estimates with new var comp environment estimates

phewas <- read.table("~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/phewasAllDescription.csv", header=TRUE)


allphewas_binary_zipcode_old <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twin_binary_zipcode_old.csv", 
    " ", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X75 = col_integer(),
##   X76 = col_integer(),
##   X77 = col_integer(),
##   X78 = col_integer(),
##   X79 = col_integer()
## )
## See spec(...) for full column specifications.
names(allphewas_binary_zipcode_old) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/names_binary_zipcode_old.rds')


allphewas_binary_zipcode_old <- allphewas_binary_zipcode_old %>%
  mutate(h2_liab_SE_se = ifelse(phewas_code == '984',h2_SE,h2_liab_SE_se)) %>%
  mutate(rliabss_SE_se = ifelse(phewas_code == '984',r_SS_SE,rliabss_SE_se)) %>%
  mutate(c2_liab_SE_se = ifelse(phewas_code == '984',c2_SE,c2_liab_SE_se)) %>%
    mutate(rliabos_SE_se = ifelse(phewas_code == '984',r_OS_SE,rliabos_SE_se)) 






names(allphewas_binary_zipcode_old) <- sub("_se", "", names(allphewas_binary_zipcode_old))



allphewas_binary_zipcode_old <- allphewas_binary_zipcode_old %>% inner_join(phewas, ., by='phewas_code') 
## Warning: Column `phewas_code` joining factor and character vector, coercing
## into character vector
comparison_var_comp_old <- allphewas_binary_zipcode_old %>%
  select(phewas_code, phewas_description,
         rliabincome_old =rliabincome, rliabincome_SE_old=rliabincome_SE, 
         rliabaqi_old=rliabaqi, rliabaqi_SE_old=rliabaqi_SE, 
         rliabtemp_old=rliabtemp, rliabtemp_SE_old=rliabtemp_SE)


comparison_var_comp_new <- allphewas_both_zipcode %>%
  select(phewas_code, 
         rliabincome, rliabincome_SE, 
         rliabaqi, rliabaqi_SE, 
         rliabtemp, rliabtemp_SE)


comparison_var_comp_both <- comparison_var_comp_old %>%
  inner_join(.,comparison_var_comp_new, by='phewas_code') %>%
  mutate(diff_income = rliabincome - rliabincome_old, diff_income_SE = sqrt(rliabincome_SE^2+rliabincome_SE_old^2)) %>%
  mutate(diff_aqi = rliabaqi - rliabaqi_old, diff_aqi_SE = sqrt(rliabaqi_SE^2+rliabaqi_SE_old^2)) %>%
mutate(diff_temp = rliabtemp - rliabtemp_old, diff_temp_SE = sqrt(rliabtemp_SE^2+rliabtemp_SE_old^2)) %>%
mutate(diff_income.zscore=diff_income/diff_income_SE) %>%
  mutate(diff_income.pvalue=2*pnorm(-abs(diff_income.zscore))) %>%
  mutate(diff_income.pvalue_FDR = p.adjust(diff_income.pvalue, method='BY')) %>%
mutate(diff_aqi.zscore=diff_aqi/diff_aqi_SE) %>%
  mutate(diff_aqi.pvalue=2*pnorm(-abs(diff_aqi.zscore))) %>%
  mutate(diff_aqi.pvalue_FDR = p.adjust(diff_aqi.pvalue, method='BY')) %>%
mutate(diff_temp.zscore=diff_temp/diff_temp_SE) %>%
  mutate(diff_temp.pvalue=2*pnorm(-abs(diff_temp.zscore))) %>%
  mutate(diff_temp.pvalue_FDR = p.adjust(diff_temp.pvalue, method='BY'))
  




ggplot(comparison_var_comp_both , aes(rliabincome_old,rliabincome)) + geom_point()  + 
  geom_abline(slope = 1, intercept = 0) 

ggplot(comparison_var_comp_both , aes(rliabaqi_old,rliabaqi)) + geom_point()  + 
  geom_abline(slope = 1, intercept = 0) 

ggplot(comparison_var_comp_both , aes(rliabtemp_old,rliabtemp)) + geom_point()  + 
  geom_abline(slope = 1, intercept = 0) 

## Income FDR and pi_0

qval_income <- qvalue(comparison_var_comp_both$diff_income.pvalue)

summary(qval_income)
## 
## Call:
## qvalue(p = comparison_var_comp_both$diff_income.pvalue)
## 
## pi0: 1   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1  <1
## p-value        4     12    23     31    40   59 500
## q-value        1      1     4      8    13   18  89
## local FDR      1      1     2      4     5    9  33
comparison_var_comp_both %>% filter(diff_income.pvalue_FDR <= 0.05) %>% tally()
##   n
## 1 3
comparison_var_comp_both %>% filter(diff_income.pvalue_FDR <= 0.05) %>% select(phewas_description)
##                           phewas_description
## 1 Otitis media and Eustachian tube disorders
## 2                               Otitis media
## 3                            Acute sinusitis
comparison_var_comp_both %>% filter(diff_income < 0) %>% tally()
##     n
## 1 260
comparison_var_comp_both %>% filter(diff_income > 0) %>% tally()
##     n
## 1 243
comparison_var_comp_both %>% filter(diff_income == 0) %>% tally()
##    n
## 1 49
income_rank_var <- comparison_var_comp_both %>% 
  select(phewas_code, phewas_description, rliabincome_old, rliabincome, rliabincome_SE, rliabincome_SE_old) %>%
  mutate(oldRank=rank(-rliabincome_old), newRank=rank(-rliabincome)) %>%
  mutate(rliabincome_old_low=rliabincome_old-1.96*rliabincome_SE_old,
         rliabincome_old_high=rliabincome_old+1.96*rliabincome_SE_old,
         rliabincome_low=rliabincome-1.96*rliabincome_SE,
         rliabincome_high=rliabincome+1.96*rliabincome_SE)
  

DT::datatable(round_df(income_rank_var, digits = 3))

AQI FDR and pi_0

qval_AQI <- qvalue(comparison_var_comp_both$diff_aqi.pvalue)

summary(qval_AQI)
## 
## Call:
## qvalue(p = comparison_var_comp_both$diff_aqi.pvalue)
## 
## pi0: 1   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1  <1
## p-value        5      9    15     23    25   44 479
## q-value        1      2     6      9     9   14  25
## local FDR      0      1     2      3     8    8  15
comparison_var_comp_both %>% filter(diff_aqi.pvalue_FDR <= 0.05) %>% tally()
##   n
## 1 2
comparison_var_comp_both %>% filter(diff_aqi.pvalue_FDR <= 0.05) %>% select(phewas_description)
##   phewas_description
## 1    Acute sinusitis
## 2  Allergic rhinitis
comparison_var_comp_both %>% filter(diff_aqi < 0) %>% tally()
##     n
## 1 205
comparison_var_comp_both %>% filter(diff_aqi > 0) %>% tally()
##     n
## 1 274
comparison_var_comp_both %>% filter(diff_aqi == 0) %>% tally()
##    n
## 1 73
aqi_rank_var <- comparison_var_comp_both %>% 
  select(phewas_code, phewas_description, rliabaqi_old, rliabaqi, rliabaqi_SE, rliabaqi_SE_old) %>%
  mutate(oldRank=rank(-rliabaqi_old), newRank=rank(-rliabaqi)) %>%
  mutate(rliabaqi_old_low=rliabaqi_old-1.96*rliabaqi_SE_old,
         rliabaqi_old_high=rliabaqi_old+1.96*rliabaqi_SE_old,
         rliabaqi_low=rliabaqi-1.96*rliabaqi_SE,
         rliabaqi_high=rliabaqi+1.96*rliabaqi_SE)

DT::datatable(round_df(aqi_rank_var, digits = 3))

Temp FDR and pi_0

qval_temp <- qvalue(comparison_var_comp_both$diff_temp.pvalue)

summary(qval_temp)
## 
## Call:
## qvalue(p = comparison_var_comp_both$diff_temp.pvalue)
## 
## pi0: 1   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1  <1
## p-value        0      0     2      6     7   14 496
## q-value        0      0     0      0     0    0   0
## local FDR      0      0     0      0     0    0   0
comparison_var_comp_both %>% filter(diff_temp.pvalue_FDR <= 0.05) %>% tally()
##   n
## 1 0
comparison_var_comp_both %>% filter(diff_temp < 0) %>% tally()
##     n
## 1 257
comparison_var_comp_both %>% filter(diff_temp > 0) %>% tally()
##     n
## 1 240
comparison_var_comp_both %>% filter(diff_temp == 0) %>% tally()
##    n
## 1 55
temp_rank_var <- comparison_var_comp_both %>% 
  select(phewas_code, phewas_description, rliabtemp_old, rliabtemp, rliabtemp_SE, rliabtemp_SE_old) %>%
  mutate(oldRank=rank(-rliabtemp_old), newRank=rank(-rliabtemp)) %>%
  mutate(rliabtemp_old_low=rliabtemp_old-1.96*rliabtemp_SE_old,
         rliabtemp_old_high=rliabtemp_old+1.96*rliabtemp_SE_old,
         rliabtemp_low=rliabtemp-1.96*rliabtemp_SE,
         rliabtemp_high=rliabtemp+1.96*rliabtemp_SE)

DT::datatable(round_df(temp_rank_var, digits = 3))

Number of Additive Traits

############### Subfunction ############################
## Compute the estimate of pi0 for a fixed value of B ##
########################################################
FixedB <- function(p, B)
{
 ## Input:
 #p: a vector of p-values
 #B: an integer, the interval [0,1] is divided into B equal-length intervals
 
 ## Output:
 #pi0: an estimate of the proportion of true null hypotheses
 
 m <- length(p) 
 t <- seq(0,1,length=B+1)   # equally spaced points in the interval [0,1]
    
 NB <- rep(0,B)         # number of p-values greater than t_i   
 NBaverage <- rep(0,B)      # average number of p-values in each of the (B-i+1) small intervals on [t_i,1]
 NS <- rep(0,B)             # number of p-values in the interval [t_i, t_(i+1)]
 pi <- rep(0,B)         # estimates of pi0  
 for(i in 1:B)
 {
  NB[i] <- length(p[p>=t[i]])
  NBaverage[i] <- NB[i]/(B-(i-1))
  NS[i] <- length(p[p>=t[i]]) - length(p[p>=t[i+1]])
  pi[i] <- NB[i]/(1-t[i])/m
 }

 i <- min(which(NS <= NBaverage))  # Find change point i
 pi0 <- min(1, mean(pi[(i-1):B]))          # average estiamte of pi0
 return(pi0)
}


############## main function ###########################
## (1) Compute the estimate of pi0                    ##
## (2) Apply the adaptive FDR controlling procedure   ##
########################################################
AverageEstimate <- function(p=NULL, Bvector=c(5, 10, 20, 50, 100), alpha=0.05)
{
 ## Input:
 #p: a vector of p-values
 #Bvector: a vector of integer values where the interval [0,1] is divided into B equal-length intervals
 #         When Bvector is an integer, number of intervals is consider as fixed. For example Bvector = 10;
 #         When Bvector is a vector, bootstrap method is used to choose an optimal value of B
 #alpha: FDR signficance level so that the FDR is controlled below alpha
 #Numboot: number of bootstrap samples

 ## Output:
 #pi0: an estimate of the proportion of true null hypotheses
 #significant: a vector of indicator variables; 
 #             it is 1 if the corresponding p-value is significant
 #         it is 0 if the corresponding p-value is not significant
    
 # check if the p-values are valid
 if (min(p)<0 || max(p)>1) print("Error: p-values are not in the interval of [0,1]")
 m <- length(p)         # Total number p-values
 
 Bvector <- as.integer(Bvector)  # Make sure Bvector is a vector of integers
 
 #Bvector has to be bigger than 1
 if(min(Bvector) <=1) print ("Error: B has to be bigger than 1")
 
 ######## Estimate pi0 ########
 if(length(Bvector) == 1)        # fixed number of numbers, i.e., B is fixed
 {
   pi0 <- AverageEstimateFixedB(p, Bvector)
 }
 else
 {
   Numboot <- 100
   OrigPi0Est <- rep(0, length(Bvector))
   for (Bloop in 1:length(Bvector))
   {
    OrigPi0Est[Bloop] <- FixedB(p, Bvector[Bloop])
   }
 
   BootResult <- matrix(0, nrow=Numboot, ncol=length(Bvector)) # Contains the bootstrap results
 
   for(k in 1:Numboot)
   {
     p.boot <- sample(p, m, replace=TRUE)    # bootstrap sample 
     for (Bloop in 1:length(Bvector))
     {
      BootResult[k,Bloop] <- FixedB(p.boot, Bvector[Bloop])
     }  
    }

   MeanPi0Est <- mean(OrigPi0Est)             # avearge of pi0 estimates over the range of Bvector
   MSEestimate <- rep(0, length(Bvector))     # compute mean-squared error
   for (i in 1:length(Bvector))
   {
     MSEestimate[i] <- (OrigPi0Est[i]- MeanPi0Est)^2
     for (k in 1:Numboot)
    {
      MSEestimate[i] <- MSEestimate[i]+1/Numboot*(BootResult[k,i] - OrigPi0Est[i])^2    
    }
   }
   pi0 <- OrigPi0Est[MSEestimate==min(MSEestimate)]
 }  # end of else           
 
 
 ######## Apply the adaptive FDR controlling procedure ########
 
 sorted.p <- sort(p)        # sorted p-values
 order.p <- order(p)        # order of the p-values
 m0 <- pi0*m                    # estimate of the number of true null       
 i <- m

 crit <- i/m0*alpha

 while (sorted.p[i] > crit )
  { 
    i <- i-1
    crit <- i/m0*alpha
    if (i==1) break
   }
  K <- i
  if (K==1 & sorted.p[K] <= 1/m0*alpha) K <- 1
  if (K==1 & sorted.p[K] > 1/m0*alpha)  K <- 0

  significant <- rep(0,m)       # indicator of significance of the p-values
  if (K > 0) significant[order.p[1:K]] <- 1

  result <- list(pi0=pi0, significant=significant)
  result
}
DT::datatable(allphewas_both %>% filter(c2_liab.pvalue <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(c2_liab.pvalue > 0.05) %>% tally())
m_sig <- allphewas_both_functional_domain %>% filter(c2_liab.pvalue <= 0.05) %>% group_by(functional_domain) %>% tally() %>% mutate(sig_num=n) %>% select(-n)

m_non_sig <- allphewas_both_functional_domain %>% filter(c2_liab.pvalue > 0.05) %>% group_by(functional_domain) %>% tally() %>% mutate(insig_num=n) %>% select(-n)


m_both <- m_non_sig %>% left_join(., m_sig, by='functional_domain')


 m_both[is.na(m_both)] <- 0
 
 m_both$pct_additive <-( m_both$insig_num / (m_both$sig_num + m_both$insig_num) ) * 100
 
  m_both$total <-( (m_both$sig_num + m_both$insig_num) ) 
 
 DT::datatable(round_df(m_both, digits=3)) 
 dataPvalue <- allphewas_both %>% select(c2_liab.pvalue) %>% filter(complete.cases(.))

 
 
 library(qvalue)
 
 qobj <- qvalue(dataPvalue$c2_liab.pvalue)


 summary(qobj)
## 
## Call:
## qvalue(p = dataPvalue$c2_liab.pvalue)
## 
## pi0: 0.5260485   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1  <1
## p-value      140    163   207    226   258  285 559
## q-value      133    154   194    220   253  285 559
## local FDR    116    131   150    170   183  200 366
 AverageEstimate(dataPvalue$c2_liab.pvalue )
## $pi0
## [1] 0.4822072
## 
## $significant
##   [1] 1 1 1 1 1 0 1 0 0 1 1 1 1 1 0 0 0 1 0 0 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0
##  [36] 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 1 1 1 1 1 0 1 1 1 0 0 0 1 1 1 1 0 0 0
##  [71] 0 0 0 1 0 1 1 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0
## [106] 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 1 0 1 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0
## [141] 0 0 1 0 0 0 1 1 1 0 0 1 1 0 0 0 0 0 1 0 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1
## [176] 1 0 1 1 1 0 1 1 0 1 1 1 1 0 0 1 0 1 0 1 1 1 0 1 1 1 0 0 0 1 1 1 1 1 1
## [211] 0 0 1 0 0 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0
## [246] 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 0 0 0 0
## [281] 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0
## [316] 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## [351] 1 1 1 0 1 1 1 1 0 0 0 0 0 0 1 0 1 0 1 1 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0
## [386] 0 1 1 1 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 1 0 1 0 1 1 1 0 1 1 1
## [421] 1 1 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0
## [456] 0 0 0 1 1 1 0 1 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 1 1 0 0 1 1 1 1
## [491] 0 0 0 1 1 0 0 0 1 1 0 1 0 1 0 0 0 1 0 0 0 0 0 1 1 1 0 1 0 1 0 0 0 0 1
## [526] 1 1 0 0 0 0 1 0 1 0 0 1 1 0 1 1 0 0 0 0 1 1 1 0 0 0 0 0 1 1 1 1 1 1

FDR Correction Phenotypes

DT::datatable(allphewas_both %>% filter( pvalue_h2_FDR <= 0.05) %>% tally() / 561)
DT::datatable(allphewas_both %>% filter(pvalue_h2_FDR > 0.05) %>% tally() / 561)
DT::datatable(allphewas_both %>% filter( pvalue_h2_FDR <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(pvalue_h2_FDR > 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter( pvalue_c2_FDR <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(pvalue_c2_FDR > 0.05) %>% tally())
DT::datatable(round_df(allphewas_both %>% filter( pvalue_h2_FDR <= 0.05) %>% tally() / 561, digits=3))
DT::datatable(round_df(allphewas_both %>% filter( pvalue_c2_FDR <= 0.05) %>% tally() / 561, digits=3))

Bonferroni Correction Phenotypes

DT::datatable(allphewas_both %>% filter( pvalue_h2_bonferroni <= 0.05) %>% tally() / 561)
DT::datatable(allphewas_both %>% filter(pvalue_h2_bonferroni > 0.05) %>% tally() / 561)
DT::datatable(allphewas_both %>% filter( pvalue_h2_bonferroni <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(pvalue_h2_bonferroni > 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter( pvalue_c2_bonferroni <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(pvalue_c2_bonferroni > 0.05) %>% tally())
DT::datatable(round_df(allphewas_both %>% filter( pvalue_h2_bonferroni <= 0.05) %>% tally() / 561, digits=3))
DT::datatable(round_df(allphewas_both %>% filter( pvalue_c2_bonferroni <= 0.05) %>% tally() / 561, digits=3))

FDR Correction beta age

DT::datatable(allphewas_both %>% filter( pvalue_beta_age_FDR <= 0.05) %>% tally())
DT::datatable(round_df(allphewas_both %>% filter( pvalue_beta_age_FDR <= 0.05) %>% tally() / 561, digits=3))

FDR Correction beta gender

DT::datatable(allphewas_both %>% filter( pvalue_beta_gender_FDR <= 0.05) %>% tally())
DT::datatable(round_df(allphewas_both %>% filter( pvalue_beta_gender_FDR <= 0.05) %>% tally() / 561, digits=3))

h2/c2/e2 for all phenotypes

phenotypes_h2_c2_e2 <- allphewas_both %>% select(phewas_code, phewas_description, h2_liab, h2_liab_SE, c2_liab, c2_liab_SE, e2, e2_SE) %>% 
  mutate(h2_low=h2_liab-1.96*h2_liab_SE,h2_high=h2_liab+1.96*h2_liab_SE ) %>%
  mutate(c2_low=c2_liab-1.96*c2_liab_SE,c2_high=c2_liab+1.96*c2_liab_SE ) %>%
  mutate(e2_low=e2-1.96*e2_SE,e2_high=e2+1.96*e2_SE ) %>%
  select(phewas_code, phewas_description, h2_liab,h2_low,h2_high, c2_liab, c2_low,c2_high, e2, e2_low,e2_high)


DT::datatable(round_df(phenotypes_h2_c2_e2, digits=3))

h2/c2/e2 for all phenotypes with p-value

phenotypes_h2_c2_e2_pvalue <- allphewas_both %>% select(phewas_code, phewas_description, h2_liab, h2_liab_SE, 
                                                 c2_liab, c2_liab_SE, e2, e2_SE,
                                                 pvalue_h2_FDR, h2_liab.zscore, pvalue_c2_FDR, c2_liab.zscore, e2.pvalue_FDR, e2.zscore, rliabos, rliabos_SE, rliabss, rliabss_SE) %>% 
  mutate(h2_low=h2_liab-1.96*h2_liab_SE,h2_high=h2_liab+1.96*h2_liab_SE ) %>%
  mutate(c2_low=c2_liab-1.96*c2_liab_SE,c2_high=c2_liab+1.96*c2_liab_SE ) %>%
  mutate(e2_low=e2-1.96*e2_SE,e2_high=e2+1.96*e2_SE ) %>%
  mutate(rliabos_low = rliabos - 1.96* rliabos_SE, rliabos_high = rliabos + 1.96* rliabos_SE  ) %>%
  mutate(rliabss_low = rliabss - 1.96* rliabss_SE, rliabss_high = rliabss + 1.96* rliabss_SE) %>%
  select(phewas_code, phewas_description, h2_liab,h2_low,h2_high, pvalue_h2_FDR, h2_liab.zscore,
         c2_liab, c2_low,c2_high,pvalue_c2_FDR, c2_liab.zscore, 
         e2, e2_low,e2_high, e2.pvalue_FDR, e2.zscore,
         rliabos, rliabos_low, rliabos_high, rliabss, rliabss_low, rliabss_high)


DT::datatable(round_df(phenotypes_h2_c2_e2_pvalue, digits = 3))

Comparison of h2/c2 to published literature

Load Comparison Data

twin_h2_c2_comparison_diff_twin_match <- readRDS(file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/MaTCHComparisonAnalysis/twin_h2_c2_comparison_diff_twin_match.rds')

Estimate Correlation Data

cor.test(twin_h2_c2_comparison_diff_twin_match$h2_liab, twin_h2_c2_comparison_diff_twin_match$h2_match,method = 'pearson')
## 
##  Pearson's product-moment correlation
## 
## data:  twin_h2_c2_comparison_diff_twin_match$h2_liab and twin_h2_c2_comparison_diff_twin_match$h2_match
## t = 4.633, df = 79, p-value = 1.399e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2712569 0.6181865
## sample estimates:
##       cor 
## 0.4622291
m <- as.matrix(twin_h2_c2_comparison_diff_twin_match %>% select(h2_liab,h2_liab_SE, h2_match, h2_match_SE))
estimateCorrelation_matrix(1:nrow(m),m)
## [1] 0.8168958
results <- jackknife(1:nrow(m),estimateCorrelation_matrix,m)
results$jack.se
## [1] 0.1652211
results$jack.bias
## [1] 0.06142777

Plot Scatterplot

#ggplot(twin_h2_c2_comparison_diff_twin_match , aes(h2_match,h2_liab)) + geom_point() + 
#    geom_errorbarh(aes(xmin=h2_match-1.96*h2_match_SE, xmax=h2_match+1.96*h2_match_SE),
#                   position = position_dodge(width = 0.01), alpha=.2) +
#      geom_errorbar(aes(ymin=h2_liab-1.96*h2_liab_SE, ymax=h2_liab+1.96*h2_liab_SE),
#                    position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) +
#    geom_smooth(method = "lm", se = TRUE) + xlab('Heritability - Published Studies') + ylab('Heritability - Insurance #Study')

Number of Traits

twin_h2_c2_comparison_diff_twin_match %>%
  mutate(diff=h2_match-h2_liab) %>%
  mutate(higherEstimate=ifelse(diff >=0, 'Published Is Higher', 'Claims is Higher')) %>%
  group_by(higherEstimate) %>%
  tally()
## # A tibble: 2 x 2
##   higherEstimate          n
##   <chr>               <int>
## 1 Claims is Higher       32
## 2 Published Is Higher    49
twinComparison_insurance_published <- twin_h2_c2_comparison_diff_twin_match %>% 
  select(-subchapter, -diff) %>%
  select(subchapterName,h2_insurance=h2_liab, 
         h2_insurance_SE=h2_liab_SE, h2_published=h2_match,
         h2_published_SE=h2_match_SE,source, method)


DT::datatable(twinComparison_insurance_published)
#library(googlesheets)


#boring_ss <- gs_new("published_estimate_comparison", 
#                    ws_title = "published_estimate_comparison", 
#                    input = round_df(twinComparison_insurance_published,digits = 3),
#                    trim = TRUE, verbose = FALSE)

Sibling Analysis

Number of Pairs by Gender

Load Sibling Data

library(readr)
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:DescTools':
## 
##     %like%
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
allphewas_binary <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twin_binary.csv", 
    " ", escape_double = FALSE, col_names = FALSE, 
    trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X7 = col_integer(),
##   X8 = col_integer(),
##   X9 = col_integer(),
##   X57 = col_integer(),
##   X58 = col_integer(),
##   X59 = col_integer(),
##   X60 = col_integer(),
##   X61 = col_integer()
## )
## See spec(...) for full column specifications.
names(allphewas_binary) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/names_binary.rds')

phewas <- read.table("~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/phewasAllDescription.csv", header=TRUE)

allphewas_binary <- allphewas_binary %>% 
  mutate(h2_liab_SE_se = ifelse(phewas_code == '984',h2_SE,h2_liab_SE_se)) %>%
  mutate(rliabss_SE_se = ifelse(phewas_code == '984',r_SS_SE,rliabss_SE_se)) %>%
  mutate(c2_liab_SE_se = ifelse(phewas_code == '984',c2_SE,c2_liab_SE_se)) %>%
    mutate(rliabos_SE_se = ifelse(phewas_code == '984',r_OS_SE,rliabos_SE_se)) 


names(allphewas_binary) <- sub("_se", "", names(allphewas_binary))



allphewas_binary <- allphewas_binary %>% inner_join(phewas, ., by='phewas_code') 
## Warning: Column `phewas_code` joining factor and character vector, coercing
## into character vector
allphewas_sibling <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/data/allphewas_sibling.txt", 
    " ", escape_double = FALSE, col_names = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character(),
##   X2 = col_integer(),
##   X3 = col_integer(),
##   X4 = col_integer(),
##   X5 = col_integer(),
##   X6 = col_integer(),
##   X31 = col_character()
## )
## See spec(...) for full column specifications.
names(allphewas_sibling) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/data/names_binary_sibling.rds')

names(allphewas_sibling) <- sub("_se", "", names(allphewas_sibling))


df1 <- allphewas_sibling %>% select(phewas_code, genderPairType,rliabsibling ) %>% spread(genderPairType,rliabsibling) %>%
  select(phewas_code, rliabsibling_SS=SS,rliabsibling_OS=OS)

df2 <- allphewas_sibling %>% select(phewas_code, genderPairType,rliabsibling_SE) %>% spread(genderPairType,rliabsibling_SE) %>%
    select(phewas_code, rliabsibling_SE_SS=SS,rliabsibling_SE_OS=OS)


allphewas_sibling_correlation <- df1 %>% inner_join(.,df2, by='phewas_code')

allphewas_binary_correlation <- allphewas_binary %>% select(phewas_code, phewas_description, rliabss, rliabss_SE,rliabos, rliabos_SE, h2_liab, h2_liab_SE, c2_liab, c2_liab_SE)


allphewas_all_correlation_sibling_twin <- allphewas_binary_correlation %>% left_join(.,allphewas_sibling_correlation, by='phewas_code')


rm(df1,df2)

Comparison of OS sibling correlation vs OS twin correlation

correlation_twinOS_sibOS <- ggplot(allphewas_all_correlation_sibling_twin , aes(rliabos,rliabsibling_OS)) + geom_point() + 
    geom_errorbarh(aes(xmin=rliabos-1.96*rliabos_SE, xmax=rliabos+1.96*rliabos_SE),
                   position = position_dodge(width = 0.01), alpha=.2) +
      geom_errorbar(aes(ymin=rliabsibling_OS-1.96*rliabsibling_SE_OS, ymax=rliabsibling_OS+1.96*rliabsibling_SE_OS),
                    position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) + 
  xlab('Twin Opposite Sex Correlation') + 
  ylab('Sibling Opposite Sex Correlation')

correlation_twinOS_sibOS
## Warning: position_dodge requires non-overlapping x intervals

ggsave(correlation_twinOS_sibOS, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/correlation_twinOS_sibOS.png')
## Saving 7 x 5 in image
## Warning: position_dodge requires non-overlapping x intervals
DT::datatable(round_df(allphewas_all_correlation_sibling_twin,digits = 3))

Comparison of OS sibling correlation vs SS twin correlation

correlation_twinOS_sibSS <- ggplot(allphewas_all_correlation_sibling_twin , aes(rliabos,rliabsibling_SS)) + geom_point() + 
    geom_errorbarh(aes(xmin=rliabos-1.96*rliabos_SE, xmax=rliabos+1.96*rliabos_SE),
                   position = position_dodge(width = 0.01), alpha=.2) +
      geom_errorbar(aes(ymin=rliabsibling_SS-1.96*rliabsibling_SE_SS, ymax=rliabsibling_SS+1.96*rliabsibling_SE_SS),
                    position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) + 
  xlab('Twin Opposite Sex Correlation') + 
  ylab('Sibling Same Sex Correlation')

correlation_twinOS_sibSS
## Warning: position_dodge requires non-overlapping x intervals

ggsave(correlation_twinOS_sibSS, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/correlation_twinOS_sibSS.png')
## Saving 7 x 5 in image
## Warning: position_dodge requires non-overlapping x intervals

Comparison of Sibling OS vs SS correlation

correlation_sibOS_sibSS <- ggplot(allphewas_sibling_correlation , aes(rliabsibling_SS,rliabsibling_OS)) + geom_point() + 
    geom_errorbarh(aes(xmin=rliabsibling_SS-1.96*rliabsibling_SE_SS, xmax=rliabsibling_SS+1.96*rliabsibling_SE_SS),
                   position = position_dodge(width = 0.01), alpha=.2) +
      geom_errorbar(aes(ymin=rliabsibling_OS-1.96*rliabsibling_SE_OS, ymax=rliabsibling_OS+1.96*rliabsibling_SE_OS),
                    position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) +
  xlab('Sibling Same Sex Correlation') + ylab('Sibling Opposite Sex Correlation')  +
  geom_smooth(method = "lm", se = TRUE)  +
    theme(axis.title.x = element_text(size=12), 
          axis.title.y = element_text(size=12)) 



correlation_sibOS_sibSS
## Warning: position_dodge requires non-overlapping x intervals

ggsave(correlation_sibOS_sibSS, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/correlation_sibOS_sibSS.png')
## Saving 7 x 5 in image
## Warning: position_dodge requires non-overlapping x intervals
allphewas_sibling_correlation_dff <- allphewas_sibling_correlation %>%
  mutate(rsib_diff=rliabsibling_SS-rliabsibling_OS)

diff_sib_corOS_corSS <- ggplot(allphewas_sibling_correlation_dff, aes(rsib_diff)) +
  geom_histogram(bins = 60) + xlab("Same Sex Correlation - Opposite Sex Correlation")

diff_sib_corOS_corSS

ggsave(diff_sib_corOS_corSS, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/diff_sib_corOS_corSS.png')
## Saving 7 x 5 in image

Number of Correlations which cross 0

allphewas_sibling_correlation <- allphewas_sibling_correlation %>%
  mutate(rsib_diff = rliabsibling_SS-rliabsibling_OS) %>%
  mutate(rsib_diff_SE = sqrt(rliabsibling_SE_SS^2+rliabsibling_SE_OS^2)) %>%
  mutate(rsib_diff_low = rsib_diff - 1.96*rsib_diff_SE,
         rsib_diff_high = rsib_diff + 1.96*rsib_diff_SE) %>%
  mutate(rsib_diff.zscore=rsib_diff/rsib_diff_SE) %>%
  mutate(rsib_diff_SE.pvalue=2*pnorm(-abs(rsib_diff.zscore))) %>%
  mutate(rsib_diff.pvalue_FDR = p.adjust(rsib_diff_SE.pvalue, method='BY')) 

Number of Phenotypes where 95% CI overlaps with 0

DT::datatable(allphewas_sibling_correlation %>% filter(rsib_diff_low < 0 & rsib_diff_high > 0) %>% tally())
DT::datatable(round_df(allphewas_sibling_correlation %>% filter(rsib_diff_low < 0 & rsib_diff_high > 0) %>% tally()/551,digits = 3))

Number of Phenotypes where p-value is less then 0.05

allphewas_sibling_correlation %>% filter(rsib_diff.pvalue_FDR <= 0.05) %>% tally()/561
##           n
## 1 0.3850267
DT::datatable(round_df(allphewas_sibling_correlation %>% filter(rsib_diff.pvalue_FDR <= 0.05) %>% tally()
, digits = 3))
DT::datatable(round_df(allphewas_sibling_correlation %>% filter(rsib_diff.pvalue_FDR <= 0.05) %>% tally()/561
, digits = 3))

Pi_0 Statistics

 qobj_sib <- qvalue(allphewas_sibling_correlation$rsib_diff_SE.pvalue)

summary(qobj_sib)
## 
## Call:
## qvalue(p = allphewas_sibling_correlation$rsib_diff_SE.pvalue)
## 
## pi0: 0.2363052   
## 
## Cumulative number of significant calls:
## 
##           <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1  <1
## p-value      152    190   271    302   336  361 551
## q-value      152    199   298    346   386  442 551
## local FDR    128    152   207    246   276  316 551
 AverageEstimate(allphewas_sibling_correlation$rsib_diff_SE.pvalue )
## $pi0
## [1] 0.2814059
## 
## $significant
##   [1] 0 1 0 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0
##  [36] 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
##  [71] 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 0 0 0 0 0 0
## [106] 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## [141] 0 1 0 1 1 1 0 1 0 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 0 1 0
## [176] 0 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1
## [211] 0 0 0 0 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 0 1 0 0
## [246] 1 1 1 0 0 1 0 1 1 1 1 1 1 1 0 0 1 1 0 1 0 1 1 1 1 1 0 0 1 0 1 1 0 0 0
## [281] 0 1 1 1 0 0 0 1 0 1 0 1 1 1 0 0 0 1 1 1 0 0 0 0 1 0 1 1 1 0 1 1 1 0 0
## [316] 1 0 0 1 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 0 1 1 0 1 0 1 1 1 1 0 0 0 0 1
## [351] 0 1 1 1 1 0 0 0 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0
## [386] 1 0 1 1 1 1 1 1 0 0 1 0 1 1 1 1 1 0 1 1 1 0 0 1 1 0 0 0 0 1 0 0 0 0 1
## [421] 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 0
## [456] 0 0 1 1 1 0 1 1 1 1 0 0 0 1 1 0 0 0 0 1 1 1 1 1 1 0 0 1 1 0 1 0 1 1 1
## [491] 1 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 1 0 1 1 0 1 0 1 0 1 1 1 1 1 1 1 1 1 1
## [526] 0 1 1 1 1 0 1 1 1 0 1 0 1 0 1 0 0 1 1 1 0 1 1 0 0 0

Meta-Analysis

Create Sibling Functional Domain Data Frame

allphewas_both_functional_domain_sibling <- allphewas_all_correlation_sibling_twin %>% 
  inner_join(.,phewas_functional_domain_map, by=c('phewas_code', 'phewas_description'))
## Warning: Column `phewas_description` joining factor and character vector,
## coercing into character vector

rsibSS

zz <- allphewas_both_functional_domain_sibling %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabsibling_SS,sei=rliabsibling_SE_SS,data=., method='DL'))


phewas_b_rsib_SS <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabsibling_SS,1:ncol(.))

phewas_se_rsib_SS <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabsibling_SE_SS,1:ncol(.))

phewas_meta_rsib_SS_liab_all <- phewas_b_rsib_SS %>% inner_join(.,phewas_se_rsib_SS, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_rsib_SS_liab_all) <- c('functional_domain','rsibSS','rsibSS_SE', 'category')


ribSSOverall <- phewas_meta_rsib_SS_liab_all %>%
  mutate(rsibSS_low=rsibSS-1.96*rsibSS_SE, rsibSS_high=rsibSS+1.96*rsibSS_SE )


DT::datatable(round_df(ribSSOverall, digits=3))
#MaTCH_h2 <- functional_domain_df %>% select(functional_domain, h2_corr, h2_corr_SE) %>% mutate(category = 'MaTCH')

#names(MaTCH_h2) <- c('functional_domain','h2','h2_SE', 'category')


#metaAll_justMatch_h2 <- phewas_meta_h2_liab_all %>% rbind(.,MaTCH_h2) 


#metaAll_justMatch_h2 <- metaAll_justMatch_h2 %>%
#  mutate(h2_low = h2-1.96*h2_SE, h2_high = h2+1.96*h2_SE )

#DT::datatable(round_df(metaAll_justMatch_h2, digits=3))

#rm(zz, phewas_b_h2, phewas_se_h2, MaTCH_h2)

rsibOS

zz <- allphewas_both_functional_domain_sibling %>% 
  filter(phewas_code != 'COST') %>%
  filter(phewas_code != 'CNT') %>% 
  split(.$functional_domain) %>% purrr::map(~rma(yi=rliabsibling_OS,sei=rliabsibling_SE_OS,data=., method='DL'))


phewas_b_rsib_OS <- zz %>% purrr::map(summary)  %>%  map_df(~.$b[1]) %>% gather(functional_domain, rliabsibling_OS,1:ncol(.))

phewas_se_rsib_OS <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabsibling_SE_OS,1:ncol(.))

phewas_meta_rsib_OS_liab_all <- phewas_b_rsib_OS %>% inner_join(.,phewas_se_rsib_OS, by='functional_domain') %>% mutate(category = 'Insurance Claims')

names(phewas_meta_rsib_OS_liab_all) <- c('functional_domain','rsibOS','rsibOS_SE', 'category')


ribOSOverall <- phewas_meta_rsib_OS_liab_all %>%
  mutate(rsibOS_low=rsibOS-1.96*rsibOS_SE, rsibOS_high=rsibOS+1.96*rsibOS_SE )


DT::datatable(round_df(ribOSOverall, digits=3))
#MaTCH_h2 <- functional_domain_df %>% select(functional_domain, h2_corr, h2_corr_SE) %>% mutate(category = 'MaTCH')

#names(MaTCH_h2) <- c('functional_domain','h2','h2_SE', 'category')


#metaAll_justMatch_h2 <- phewas_meta_h2_liab_all %>% rbind(.,MaTCH_h2) 


#metaAll_justMatch_h2 <- metaAll_justMatch_h2 %>%
#  mutate(h2_low = h2-1.96*h2_SE, h2_high = h2+1.96*h2_SE )

#DT::datatable(round_df(metaAll_justMatch_h2, digits=3))

#rm(zz, phewas_b_h2, phewas_se_h2, MaTCH_h2)

Barplot by functional group - including sibling

phewas_meta_h2_liab_all$stat <- 'h2'
phewas_meta_c2_liab_all$stat <- 'c2'
#phewas_meta_e2_liab_all$stat <- 'e2'
phewas_meta_rss_liab_all$stat <- 'rtwinOS'
phewas_meta_ros_liab_all$stat <- 'rtwinSS'

phewas_meta_rsib_SS_liab_all$stat <- 'rsibSS'
phewas_meta_rsib_OS_liab_all$stat <- 'rsibOS'

a <- phewas_meta_h2_liab_all %>% select(functional_domain, statistic = h2, se=h2_SE, stat)
b <- phewas_meta_c2_liab_all %>% select(functional_domain, statistic = c2,se=c2_SE, stat)
#c <- phewas_meta_e2_liab_all %>% select(functional_domain, statistic = e2, stat)
d <- phewas_meta_ros_liab_all %>% select(functional_domain, statistic = rOS,se=rOS_SE, stat)
e <- phewas_meta_rss_liab_all %>% select(functional_domain, statistic = rSS,se=rSS_SE, stat)

f <- phewas_meta_rsib_SS_liab_all %>% select(functional_domain, statistic = rsibSS, se=rsibSS_SE, stat)
g <- phewas_meta_rsib_OS_liab_all %>% select(functional_domain, statistic = rsibOS,se=rsibOS_SE, stat)



z_meta_all <- a %>% bind_rows(.,b,d,e,f,g)

rm(a,b,c,d,e,g,f)
## Warning in rm(a, b, c, d, e, g, f): object 'c' not found
z_meta_all$stat <- factor(z_meta_all$stat, levels=c('rtwinOS','rsibOS','rtwinSS','rsibSS', 'h2','c2'))


z_meta_all$functional_domain <- factor(z_meta_all$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal',
                                                            'Quantitative',
                                                            'Avg. Monthly Cost',
                                                            'PheWAS Comorbids'))



barchart_fn_domain <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) + 
  geom_bar(stat="identity", position="dodge", width = 0.75, colour="black") +
  theme(axis.text.x=element_text(angle=45, size = 8),
        axis.title.x = element_text(vjust=10)) +
ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic') +
  guides(colour = guide_legend(nrow = 1))


barchart_fn_domain

ggsave(barchart_fn_domain, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/barchart_fn_domain.png')
## Saving 7 x 5 in image
braden_df_meta_analytic_estimates_noenvironment <- z_meta_all

Other Plots

Marginal Plots / Scatter plots

allData_meta_all <- allphewas_both %>% 
  mutate(diff_rss_ros = rliabss-rliabos) %>% mutate(h2_upper = 2*rliabos) %>%
  mutate(rOS=rliabos, rSS = rliabss)

scatter_h2_c2 <-  ggplot(allData_meta_all, aes(h2, c2)) + 
  geom_point(aes(colour = c('#008FD5'))) +  
  xlab('h2') +
  ylab('c2') + 
  theme(legend.position="none")  +
  geom_text(aes(label=ifelse(phewas_code == '984' |  phewas_code == '313.1' |
                               phewas_code == '134577' | 
                               phewas_code == '495' | 
                               phewas_code == '637' | 
                               phewas_code == '464' 
                               ,shortName,'')),hjust=0,vjust=0,fontface = "bold", size = 2)

scatter_h2_c2
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).
scatter_h2_c2 <- ggMarginal(scatter_h2_c2)
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_text).
scatter_h2_c2

ggsave(scatter_h2_c2, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/scatter_h2_c2.png', width = 5, height = 3)





#scatter_h2_e2 <-  ggplot(allData_meta_all, aes(h2, e2)) + 
#  geom_point(aes(colour = c('#008FD5'))) +  
#  xlab('h2') +
#  ylab('e2') + 
#  theme(legend.position="none")  +
#    geom_text(aes(label=ifelse(phewas_code == '313.1' | phewas_code == '134577' | phewas_code == '495' | 
#                                 phewas_code == '871.1' |  phewas_code == '800.3' | phewas_code =='726.3'
#                               ,shortName,'')),hjust=0,vjust=0,fontface = "bold", size = 2)



  
#scatter_h2_e2 <- ggMarginal(scatter_h2_e2)


scatter_h2_h2_upper <-  ggplot(allData_meta_all, aes(h2, h2_upper)) + 
  geom_point(aes(colour = c('#008FD5'))) +  
  xlab('h2') +
  ylab('h2_upper (2*rOS)') + 
  theme(legend.position="none") +
    geom_text(aes(label=ifelse(phewas_code == '984' |  phewas_code == '313.1' |
                               phewas_code == '134577' | 
                               phewas_code == '495' | 
                               phewas_code == '637' | 
                               phewas_code == '464' 
                               ,shortName,'')),hjust=0,vjust=0,fontface = "bold", size = 2)



  
scatter_h2_h2_upper <- ggMarginal(scatter_h2_h2_upper)
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_text).
scatter_ros_rss <-  ggplot(allData_meta_all, aes(rOS, rSS)) + 
  geom_point(aes(colour = c('#008FD5'))) +  
  xlab('rtwinOS') +
  ylab('rtwinSS') + 
  theme(legend.position="none") +
      geom_text(aes(label=ifelse(phewas_code == '362.1' | phewas_code == '464' | 
                                   phewas_code == '134577' | phewas_code == '696.4' | phewas_code == '313.1'  |
                                   phewas_code == '871.1',
                                 shortName,'')),hjust=0,vjust=0,fontface = "bold", size = 2, nudge_x=-.01)



  
scatter_ros_rss <- ggMarginal(scatter_ros_rss) 
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_text).
ggsave(scatter_ros_rss, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/scatter_ros_rss.png',width = 5, height = 3)




scatter_h2_c2_ros_rss <- ggarrange(scatter_h2_c2,scatter_h2_h2_upper ,scatter_ros_rss, 
          labels = c("a)", "b)", "c)"),
          ncol = 2, nrow = 2)

scatter_h2_c2_ros_rss

ggsave(scatter_h2_c2_ros_rss, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/scatter_h2_c2_ros_rss.png')
## Saving 7 x 5 in image

Cost CDF/ Count CDF/h2,c2,ros,rss of cost and cnt

cost_comorbid_pheno <- allphewas_both %>% 
  dplyr::filter(phewas_code == 'COST' | phewas_code == 'CNT')

cost_comorbid_pheno$Description <- c('Number PheWAS Comorbidities','Avg. Monthly Cost')


cost_comorbid_pheno <- cost_comorbid_pheno %>% select(phewas_code, Description, rss01=rliabss,rliabss_SE, ros01=rliabos,rliabos_SE,h2_liab, h2_liab_SE, c2_liab, c2_liab_SE)

cost_comorbid_pheno_rss <- cost_comorbid_pheno[,c('phewas_code', 'Description','rss01','rliabss_SE')]  %>% 
  mutate(statisticType = 'rSS')

cost_comorbid_pheno_ros <- cost_comorbid_pheno %>% 
  select(phewas_code, Description,ros01,rliabos_SE) %>%
  mutate(statisticType = 'rOS')

cost_comorbid_pheno_h2 <- cost_comorbid_pheno %>% 
  select(phewas_code, Description,h2_liab,h2_liab_SE) %>%
  mutate(statisticType = 'h2')

cost_comorbid_pheno_c2 <- cost_comorbid_pheno %>% 
  select(phewas_code, Description,c2_liab,c2_liab_SE) %>%
  mutate(statisticType = 'c2')

names(cost_comorbid_pheno_rss) <- c('phewas_code','Description','statistic','standard_error','statisticType' )
names(cost_comorbid_pheno_ros) <- c('phewas_code','Description','statistic','standard_error','statisticType')
names(cost_comorbid_pheno_h2) <- c('phewas_code','Description','statistic','standard_error','statisticType')
names(cost_comorbid_pheno_c2) <- c('phewas_code','Description','statistic','standard_error','statisticType')

cost_comorbid_pheno_long <- cost_comorbid_pheno_rss %>% 
  rbind(.,cost_comorbid_pheno_ros) %>% 
  rbind(.,cost_comorbid_pheno_h2) %>%
  rbind(.,cost_comorbid_pheno_c2)


cost_comorbid_ggplot <- ggplot(cost_comorbid_pheno_long, aes(y=statistic, x=as.factor(Description), colour=statisticType)) + 
  geom_point(position = position_dodge(width = .5)) +  
  geom_errorbar(aes(ymin=statistic-1.96*standard_error, ymax=statistic+1.96*standard_error),position = position_dodge(width = 0.5)) + 
  theme(axis.text=element_text(size=8))  + 
  scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
  theme(axis.title = element_text()) + ylab('Statistic') + xlab('Phenotype') + labs(color='Statistic Type')

cost_comorbid_ggplot

ggsave(cost_comorbid_ggplot, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/cost_comorbid_ggplot.png')
## Saving 7 x 5 in image
costDistribution <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/costDistribution.csv")
## Parsed with column specification:
## cols(
##   cost = col_double()
## )
comorbidDistribution <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/comorbidDistribution.csv")
## Parsed with column specification:
## cols(
##   comorbids = col_integer()
## )
costggplotCDF <- ggplot(costDistribution %>% filter(cost >=0), aes(cost )) + stat_ecdf(geom = "step", pad = 'FALSE') + 
  ylab('Percentage of Twins') + xlab('Average Monthly Cost')


comorbidggplotCDF <- ggplot(comorbidDistribution, aes(comorbids )) + stat_ecdf(geom = "step", pad = 'FALSE') +
  ylab('Percentage of Twins') + xlab('Number of Comorbids')




cdf_cost_comorbid <- ggarrange(costggplotCDF,comorbidggplotCDF,labels = c( "a)", 'b)'))

cdf_cost_comorbid_bar_chart <- ggarrange(cdf_cost_comorbid,cost_comorbid_ggplot,labels = c( "c)", ''), nrow=2)

cdf_cost_comorbid_bar_chart

ggsave(cdf_cost_comorbid_bar_chart,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/cdf_cost_comorbid_bar_chart.png', height = 8, width=8)


cost_comorbid_pheno_table <- cost_comorbid_pheno %>% 
  mutate(rss_low=rss01-1.96*rliabss_SE,rss_high=rss01+1.96*rliabss_SE ) %>%
  mutate(ros_low=ros01-1.96*rliabos_SE,ros_high=ros01+1.96*rliabos_SE ) %>%
  mutate(h2_low=h2_liab-1.96*h2_liab_SE,h2_high=h2_liab+1.96*h2_liab_SE ) %>%
  mutate(c2_low=c2_liab-1.96*c2_liab_SE,c2_high=c2_liab+1.96*c2_liab_SE ) %>%
select(phewas_code, Description,rss01, rss_low,rss_high, ros01, ros_low, ros_high, h2_liab, h2_low, h2_high,c2_liab, c2_low, c2_high)

DT::datatable(round_df(cost_comorbid_pheno_table, digits=3))

Twin Year of Birth Analysis

twinPairsYOB <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twinPairsYOB.csv", 
    "\t", escape_double = FALSE, col_types = cols(phewas_code = col_character()), 
    trim_ws = TRUE)


twinPairsYOB <- twinPairsYOB %>% inner_join(.,quant_pheno_description, by='phewas_code')


twinPairsYOB <- twinPairsYOB %>% mutate(Description = ifelse(phewas_code == 'COST','Binary Phenotypes', Description))

ggplot_ageDistribution <- ggplot(twinPairsYOB, aes(YOB, fill = Description, colour = Description)) + 
  geom_density(alpha = 0.1) + facet_wrap(~Description) +
  theme(axis.text.x=element_text(angle=45, size = 8),
        axis.title.x = element_text(vjust=2)) +
ylab('Density') + xlab('Year of Birth') +
theme(legend.position="none")

  


ggplot_ageDistribution

ggsave(ggplot_ageDistribution, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/ggplot_ageDistribution.png')
## Saving 7 x 5 in image

Volcano Plot

thresh_h2 <- allphewas_both_zipcode %>% filter(pvalue_h2_FDR <= 0.05) %>% summarise(val=min(observed_h2))
thresh_c2 <- allphewas_both_zipcode %>% filter(pvalue_c2_FDR <= 0.05) %>% summarise(val=min(observed_c2))
thresh_e2 <- allphewas_both_zipcode %>% filter(e2.pvalue_FDR <= 0.05) %>% summarise(val=min(observed_e2))

thresh_rliabincome <- allphewas_both_zipcode %>% 
  filter(rliabincome.pvalue_FDR <= 0.05) %>% summarise(val=min(observed_rliabincome))

thresh_rliabaqi <- allphewas_both_zipcode %>% 
  filter(rliabaqi.pvalue_FDR <= 0.05) %>% summarise(val=min(observed_rliabaqi))

thresh_rliabtemp <- allphewas_both_zipcode %>% 
  filter(rliabtemp.pvalue_FDR <= 0.05) %>% summarise(val=min(observed_rliabtemp))



fullData_binary_quant <- allphewas_both_zipcode %>% 
  mutate(twinPrevalenceCategory = as.character(cut(prevalence, breaks=c(0,.10,.30,.80),include.lowest=TRUE,right=FALSE))) %>%
  replace_na(list(twinPrevalenceCategory='Quantitative Trait')) %>%
  mutate(h2_rank = rank(-h2_liab.zscore ,ties.method='random')) %>%
  mutate(c2_rank = rank(-c2_liab.zscore,ties.method='random')) %>%
  mutate(e2_rank = rank(-e2.zscore,ties.method='random')) %>%
  mutate(rliabincome_rank = rank(-rliabincome.zscore,ties.method='random')) %>%
  mutate(rliabaqi_rank = rank(-rliabaqi.zscore,ties.method='random')) %>%
  mutate(rliabtemp_rank = rank(-rliabtemp.zscore,ties.method='random')) %>%
  mutate(h2_rank_effect = rank(-h2_liab ,ties.method='random')) %>%
  mutate(c2_rank_effect = rank(-c2_liab,ties.method='random')) %>%
  mutate(e2_rank_effect = rank(-e2,ties.method='random')) %>%
  mutate(rliabincome_rank_effect = rank(-rliabincome,ties.method='random')) %>%
  mutate(rliabaqi_rank_effect = rank(-rliabaqi,ties.method='random')) %>%
  mutate(rliabtemp_rank_effect = rank(-rliabtemp,ties.method='random')) %>%
  mutate(h2_label = ifelse(h2_rank <= 5 | h2_rank_effect <= 2,shortName,'')) %>%
  mutate(c2_label = ifelse(c2_rank <= 5 |c2_rank_effect <= 2  ,shortName,'')) %>%
  mutate(rliabincome_label = ifelse(rliabincome_rank <= 5 | rliabincome_rank_effect <= 2,shortName,'')) %>%
  mutate(rliabaqi_label = ifelse(rliabaqi_rank <= 5 | rliabaqi_rank_effect <= 2 ,shortName,'')) %>%
  mutate(rliabtemp_label = ifelse(rliabtemp_rank <= 5 | rliabtemp_rank_effect <= 2, shortName,'')) 





unique(fullData_binary_quant$twinPrevalenceCategory)
## [1] "[0,0.1)"            "[0.1,0.3)"          "[0.3,0.8]"         
## [4] "Quantitative Trait"
fullData_binary_quant$twinPrevalenceCategory <-factor(fullData_binary_quant$twinPrevalenceCategory, 
                                                 levels = c('[0,0.1)','[0.1,0.3)','[0.3,0.8]','Quantitative Trait'))





alphaLevel <- .6

library(RColorBrewer)

myColors <- c('lightsalmon1','blue','green', 'yellow2')
names(myColors) <- levels(fullData_binary_quant$twinPrevalenceCategory)



p1 <-  ggplot(fullData_binary_quant, aes(h2_liab,observed_h2, label=h2_label)) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Heritability') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = 3) 









p2 <-  ggplot(fullData_binary_quant, aes(c2_liab,observed_c2, label=c2_label)) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Shared Environmental Variance') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = 3) 







#p3 <-  ggplot(fullData_binary_quant, aes(e2,observed_e2)) + 
#  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) + 
#  xlab('Residual Variance') + ylab('-log10(p)') +
#  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed")  +     
#    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
#  geom_text(position='jitter',aes(label=ifelse(( e2_rank ==1  | e2_rank == 2 |  
 #                                                  e2_rank ==5 | e2_rank==7  ),shortName,'')),hjust=1,vjust=1,fontface = "bold", size = 3) +
#      geom_text(position='jitter',aes(label=ifelse((e2_rank == 3 ),shortName,'')),hjust=0,vjust=1,fontface = "bold", size = 3) 





p4 <-  ggplot(fullData_binary_quant, aes(rliabincome,observed_rliabincome, label=rliabincome_label) ) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Socioeconomic Status Variance Component') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = 3) 



p5 <-  ggplot(fullData_binary_quant, aes(rliabaqi,observed_rliabaqi, label=rliabaqi_label) ) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
xlab('Monthly AQI Variance Component') + ylab('-log10(p)') +
    geom_text_repel( fontface = "bold", size = 3) 




p6 <-  ggplot(fullData_binary_quant, aes(rliabtemp,observed_rliabtemp, label=rliabtemp_label) ) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Monthly Temperature Variance Component') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = 3) 




volcano_plot <- ggarrange(p1,p2, p4 , p5, p6, labels = c( 'b)', 'c)','d)', 'e)' ,'f)' ), common.legend = TRUE, nrow=3,ncol = 2, legend  = 'bottom')
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
#barchart_fn_domain

#volcano_bar_chart <- ggarrange(barchart_fn_domain,volcano_plot, labels=c('a)',''), nrow = 2,heights   = 1:2)

#volcano_bar_chart

#ggsave(volcano_bar_chart,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/volcano_bar_chart.png', height = 12, width = 10)
df_tttt <- fullData_binary_quant %>% select(phewas_code, phewas_description, rliabincome,rliabincome_SE, rliabincome.pvalue, rliabincome.pvalue_FDR, rliabincome.zscore, rliabincome_rank)

Figures

Figure 1

Generate Mecklenburg Time Series

daily_aqi_by_county_2007 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2007.csv") 
## Parsed with column specification:
## cols(
##   `State Name` = col_character(),
##   `county Name` = col_character(),
##   `State Code` = col_character(),
##   `County Code` = col_character(),
##   Date = col_date(format = ""),
##   AQI = col_integer(),
##   Category = col_character(),
##   `Defining Parameter` = col_character(),
##   `Defining Site` = col_character(),
##   `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2008 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2008.csv") 
## Parsed with column specification:
## cols(
##   `State Name` = col_character(),
##   `county Name` = col_character(),
##   `State Code` = col_character(),
##   `County Code` = col_character(),
##   Date = col_date(format = ""),
##   AQI = col_integer(),
##   Category = col_character(),
##   `Defining Parameter` = col_character(),
##   `Defining Site` = col_character(),
##   `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2009 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2009.csv") 
## Parsed with column specification:
## cols(
##   `State Name` = col_character(),
##   `county Name` = col_character(),
##   `State Code` = col_character(),
##   `County Code` = col_character(),
##   Date = col_date(format = ""),
##   AQI = col_integer(),
##   Category = col_character(),
##   `Defining Parameter` = col_character(),
##   `Defining Site` = col_character(),
##   `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2010 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2010.csv") 
## Parsed with column specification:
## cols(
##   `State Name` = col_character(),
##   `county Name` = col_character(),
##   `State Code` = col_character(),
##   `County Code` = col_character(),
##   Date = col_date(format = ""),
##   AQI = col_integer(),
##   Category = col_character(),
##   `Defining Parameter` = col_character(),
##   `Defining Site` = col_character(),
##   `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2011 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2011.csv")
## Parsed with column specification:
## cols(
##   `State Name` = col_character(),
##   `county Name` = col_character(),
##   `State Code` = col_character(),
##   `County Code` = col_character(),
##   Date = col_date(format = ""),
##   AQI = col_integer(),
##   Category = col_character(),
##   `Defining Parameter` = col_character(),
##   `Defining Site` = col_character(),
##   `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2012 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2012.csv") 
## Parsed with column specification:
## cols(
##   `State Name` = col_character(),
##   `county Name` = col_character(),
##   `State Code` = col_character(),
##   `County Code` = col_character(),
##   Date = col_date(format = ""),
##   AQI = col_integer(),
##   Category = col_character(),
##   `Defining Parameter` = col_character(),
##   `Defining Site` = col_character(),
##   `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2013 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2013.csv") 
## Parsed with column specification:
## cols(
##   `State Name` = col_character(),
##   `county Name` = col_character(),
##   `State Code` = col_character(),
##   `County Code` = col_character(),
##   Date = col_date(format = ""),
##   AQI = col_integer(),
##   Category = col_character(),
##   `Defining Parameter` = col_character(),
##   `Defining Site` = col_character(),
##   `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2014 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2014.csv") 
## Parsed with column specification:
## cols(
##   `State Name` = col_character(),
##   `county Name` = col_character(),
##   `State Code` = col_character(),
##   `County Code` = col_character(),
##   Date = col_date(format = ""),
##   AQI = col_integer(),
##   Category = col_character(),
##   `Defining Parameter` = col_character(),
##   `Defining Site` = col_character(),
##   `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2015 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2015.csv") 
## Parsed with column specification:
## cols(
##   `State Name` = col_character(),
##   `county Name` = col_character(),
##   `State Code` = col_character(),
##   `County Code` = col_character(),
##   Date = col_date(format = ""),
##   AQI = col_integer(),
##   Category = col_character(),
##   `Defining Parameter` = col_character(),
##   `Defining Site` = col_character(),
##   `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2016 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2016.csv") 
## Parsed with column specification:
## cols(
##   `State Name` = col_character(),
##   `county Name` = col_character(),
##   `State Code` = col_character(),
##   `County Code` = col_character(),
##   Date = col_date(format = ""),
##   AQI = col_integer(),
##   Category = col_character(),
##   `Defining Parameter` = col_character(),
##   `Defining Site` = col_character(),
##   `Number of Sites Reporting` = col_integer()
## )
aqi_monthly <- daily_aqi_by_county_2007 %>% bind_rows(.,daily_aqi_by_county_2008,daily_aqi_by_county_2009,
                                                      daily_aqi_by_county_2010,daily_aqi_by_county_2011,
                                                      daily_aqi_by_county_2012,daily_aqi_by_county_2013,
                                                      daily_aqi_by_county_2014,daily_aqi_by_county_2015,
                                                      daily_aqi_by_county_2016) %>%
  select(state=`State Name`, county=`county Name`, county_code=`County Code`, Date, AQI) 



mecklenburg_aqi <- aqi_monthly %>% filter(state=='North Carolina', county == 'Mecklenburg')



mecklenburg_aqi$Date[366]
## [1] "2008-01-01"
as.numeric(mecklenburg_aqi$Date[366])
## [1] 13879
mecklenburg_aqi$Date[2557]
## [1] "2013-12-31"
as.numeric(mecklenburg_aqi$Date[2557])
## [1] 16070
mecklenburg_noaa_sensor <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/var_comp_zipcode_analysis/mecklenburg_noaa_sensor.csv", 
                                    col_types = cols(`?column?` = col_double(), 
                                                     startdate = col_character()))
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 3 parsing failures.
## row # A tibble: 3 x 5 col     row col     expected actual  file                                      expected   <int> <chr>   <chr>    <chr>   <chr>                                     actual 1     3 ?colum… a double "\" \"" '~/Dropbox (HMS)/RagGroup Team Folder/ge… file 2     4 ?colum… a double "\" \"" '~/Dropbox (HMS)/RagGroup Team Folder/ge… row 3    87 ?colum… a double "\" \"" '~/Dropbox (HMS)/RagGroup Team Folder/ge…
names(mecklenburg_noaa_sensor) <- c('startdate','Temperature')

mecklenburg_noaa_sensor <- mecklenburg_noaa_sensor %>%
mutate(Date=substring(startdate,0,10)) %>% mutate(Date=parse_datetime(Date, format='%F'))



mecklenburg_noaa_sensor$Date[9]
## [1] "2008-01-01 UTC"
as.numeric(mecklenburg_noaa_sensor$Date[9])
## [1] 1199145600
mecklenburg_noaa_sensor$Date[63]
## [1] "2014-01-01 UTC"
as.numeric(mecklenburg_noaa_sensor$Date[63])
## [1] 1388534400
axis_text <- element_text(face = "bold", color = "black", size = 12)





library(readr)
ACS_13_5YR_S1901_with_ann <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/var_comp_zipcode_analysis/ACS_13_5YR_S1901/ACS_13_5YR_S1901_with_ann.csv", 
                                      skip = 1)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Id = col_character(),
##   Id2 = col_integer(),
##   Geography = col_character(),
##   `Households; Estimate; Total` = col_integer(),
##   `Households; Margin of Error; Total` = col_integer(),
##   `Families; Estimate; Total` = col_integer(),
##   `Families; Margin of Error; Total` = col_integer(),
##   `Married-couple families; Estimate; Total` = col_integer(),
##   `Married-couple families; Margin of Error; Total` = col_integer(),
##   `Nonfamily households; Estimate; Total` = col_integer(),
##   `Nonfamily households; Margin of Error; Total` = col_integer(),
##   `Households; Estimate; Median income (dollars)` = col_integer(),
##   `Households; Margin of Error; Median income (dollars)` = col_integer(),
##   `Families; Estimate; Median income (dollars)` = col_integer(),
##   `Families; Margin of Error; Median income (dollars)` = col_integer(),
##   `Married-couple families; Estimate; Median income (dollars)` = col_integer(),
##   `Married-couple families; Margin of Error; Median income (dollars)` = col_integer(),
##   `Nonfamily households; Estimate; Median income (dollars)` = col_integer(),
##   `Nonfamily households; Margin of Error; Median income (dollars)` = col_integer(),
##   `Households; Estimate; Mean income (dollars)` = col_integer()
##   # ... with 28 more columns
## )
## See spec(...) for full column specifications.
income_distribution <- ACS_13_5YR_S1901_with_ann %>% 
  select(Id2, starts_with('Families;')) %>%
  select(Id2, contains('Estimate;')) %>%
  select(-contains('PERCENT IMPUTED')) %>%
  gather(estimate, percentage, 3:12) %>%
  separate(estimate,c('fam','est','bracket'), sep = ';') %>% select(-fam,-est) %>%
  mutate(bracket = trimws(bracket, which=c('both'))) 




income_distribution$bracket <- factor(income_distribution$bracket, 
                                       levels = c('Less than $10,000',
                                                  '$10,000 to $14,999',
                                                  '$15,000 to $24,999',
                                                  '$25,000 to $34,999',
                                                  '$35,000 to $49,999',
                                                  '$50,000 to $74,999',
                                                  '$75,000 to $99,999',
                                                  '$100,000 to $149,999',
                                                  '$150,000 to $199,999',
                                                  '$200,000 or more'))


myLoc <- 
  (which(levels(income_distribution$bracket) == '$50,000 to $74,999') +
     which(levels(income_distribution$bracket) == '$75,000 to $99,999')) / 2


size_text <- 6


income_plot <-  ggplot(income_distribution, aes(x=bracket, y=percentage)) + geom_col(fill='dark green') +
  xlab('Income Bracket') + ylab('Percentage') +
  geom_vline(aes(xintercept = myLoc), size=2) +
    theme(axis.text.x = element_text(angle = 20, size=size_text,hjust = 1), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text),
        axis.title.y = element_text(size=size_text)) + 
    theme(legend.text=element_text(size=size_text)) 
  

income_plot

ts_aqi <- ggplot(mecklenburg_aqi, aes(x = Date, y = AQI)) + 
  geom_line(color='blue') +
  xlab('Date') + ylab('Daily AQI') +
  geom_vline(xintercept =13879, size=2) +
  geom_vline(xintercept =16070, size=2) +
    theme(axis.text.x = element_text(angle = 0, size=size_text), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text),
        axis.title.y = element_text(size=size_text)) + 
    theme(legend.text=element_text(size=size_text)) 






ts_temperature <- ggplot(mecklenburg_noaa_sensor, aes(x = Date, y = Temperature)) + 
  geom_line(color='red') +
  geom_vline(xintercept =as.numeric(mecklenburg_noaa_sensor$Date[63]), size=2) +
  geom_vline(xintercept =as.numeric(mecklenburg_noaa_sensor$Date[9]), size=2) +
xlab('Date') + ylab('Average Monthly Temp') +
    theme(axis.text.x = element_text(angle = 0, size=size_text), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text),
        axis.title.y = element_text(size=size_text)) + 
    theme(legend.text=element_text(size=size_text)) 



ts_both <- ggarrange(ts_aqi, ts_temperature, income_plot, labels=c('d)', 'e)', 'f)'), nrow=3,
                     font.label = list(size = 8))
## Warning: Removed 1 rows containing missing values (geom_path).
ts_both

library(magick)
## Linking to ImageMagick 6.9.9.39
## Enabled features: cairo, fontconfig, freetype, lcms, pango, rsvg, webp
## Disabled features: fftw, ghostscript, x11
meck_diagram <- image_read('~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/mecklenburg_grapple_image_v2.png')


meck_diagram_ggplot <- image_ggplot(meck_diagram)



ts_both_picture <- ggarrange(ts_both,meck_diagram_ggplot, labels=c('', 'g)'), ncol=2, font.label = list(size = 8))


#ggsave(ts_temperature,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/mecklenburg_ts_noaa.png')
#ggsave(ts_aqi,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/mecklenburg_ts_AQI.png')

#ggsave(ts_both_picture, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1b.png')

#ts_both_picture

#ggsave(ts_both, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1b_left.png')


ts_both_picture

#ggsave(ts_both,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/mecklenburg_data.png', height=12, width=8)
map_prevalence_data <- readRDS(all,file = '~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/maps/map_prevalence_data.rds')

pp <- ggplot() +
  geom_polygon(data = map_prevalence_data, 
               aes(x = long, y = lat, group = group, fill = numPairs), 
               color = "black", size = 0.25) + 
  coord_map() +
  labs(fill = "Number of Twin Pairs") + xlim(-140,-66) + theme_map() +
  theme(legend.text=element_text(size=size_text),
        legend.title = element_text(size=size_text),
        legend.key.size = unit(.25,'cm')) 

pp

df_Pop <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/SES_Analysis/popDensity_twin_ACS.rds')

df_SES <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/SES_Analysis/SES_twin_ACS.rds')



popDensityPlot <- ggplot(df_Pop, aes(x=densityBin, y=normalizedPop)) + 
  geom_bar(stat='identity', aes(fill = cohort))  + 
  facet_wrap(~cohort,nrow = 1) +
  theme(axis.text.x = element_text(angle = 60, size=1), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text),
        axis.title.y = element_text(size=size_text),
        strip.text = element_text(size=size_text)) + 
  theme(legend.text=element_text(size=size_text)) +
  xlab("Log of Population Per Square Mile Area") + 
  ylab("% Total Population")




sesDensityPlot <- ggplot(df_SES, aes(x=SES_bin, y=normalizedPop)) + 
  geom_bar(stat='identity', aes(fill = cohort))  + 
  facet_wrap(~cohort,nrow = 1) +
  theme(axis.text.x = element_text(angle = 60, size=1), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text),
        axis.title.y = element_text(size=size_text),
        strip.text = element_text(size=size_text)) + 
    theme(legend.text=element_text(size_text)) +
  xlab("Depravity Index Score") + ylab("% Total Population") 




figure1a <- ggarrange(popDensityPlot,sesDensityPlot,labels = c( 'b)','c)'),  
                      common.legend=TRUE, ncol = 2, legend = 'none', align = 'hv',
                      font.label = list(size = 8))

figure1 <- ggarrange(pp,figure1a,labels = c( "a)", ''),  common.legend=FALSE, nrow = 2, ncol = 1,legend = 'top',
                     font.label = list(size = 8))

figure1

figure1_combined <- ggarrange(figure1,ts_both_picture, nrow=2,common.legend = FALSE)

figure1_combined

#save_plot(figure1_combined,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1_combined.png',
#                    base_aspect_ratio = 1.3 # make room for figure legend
#          )



#ggsave(figure1,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1a.png')


#save_plot(ts_both_picture,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1b.png')



ggsave(figure1_combined,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1.pdf', 
          height = 8.5, width=6.9, units='in', dpi = 'retina', device = 'pdf')


#ggsave(pp, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/twin_map.png')


#ggsave(popDensityPlot, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/popDensityPlot.png')

#ggsave(sesDensityPlot, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/sesDensityPlot.png')

Figure 2

size_text <- 8



barchart_fn_domain <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) + 
  geom_bar(stat="identity", position="dodge", width = 0.85, colour="black",size=.15) +
ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic') +
    theme(axis.text.x = element_text(angle = 45, size=size_text, hjust = .9), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text, vjust=.1),
        axis.title.y = element_text(size=size_text)) + 
    theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
    theme(legend.direction = "horizontal",legend.position = c(.35,.85)) +
    geom_errorbar(aes(ymin=statistic-1.96*se, 
                    ymax=statistic+1.96*se),
                width=.5,size=.2,
                position=position_dodge(.9))





barchart_fn_domain

alphaLevel <- .6

library(RColorBrewer)

myColors <- c('lightsalmon1','blue','green', 'yellow2')
names(myColors) <- levels(fullData_binary_quant$twinPrevalenceCategory)


labelSize <- 1.5

p1 <-  ggplot(fullData_binary_quant, aes(h2_liab,observed_h2, label=h2_label)) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Heritability') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = labelSize) +
    theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text, vjust=.1),
        axis.title.y = element_text(size=size_text)) + 
    theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) 








p2 <-  ggplot(fullData_binary_quant, aes(c2_liab,observed_c2, label=c2_label)) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Shared Environmental Variance') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = labelSize) +
    theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text, vjust=.1),
        axis.title.y = element_text(size=size_text)) + 
    theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) 







p4 <-  ggplot(fullData_binary_quant, aes(rliabincome,observed_rliabincome, label=rliabincome_label) ) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Socioeconomic Status Variance Component') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = labelSize) +
    theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text, vjust=.1),
        axis.title.y = element_text(size=size_text)) + 
    theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) 





p5 <-  ggplot(fullData_binary_quant, aes(rliabaqi,observed_rliabaqi, label=rliabaqi_label) ) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
xlab('Monthly AQI Variance Component') + ylab('-log10(p)') +
    geom_text_repel( fontface = "bold", size = labelSize) +
    theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text, vjust=.1),
        axis.title.y = element_text(size=size_text)) + 
    theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) 






p6 <-  ggplot(fullData_binary_quant, aes(rliabtemp,observed_rliabtemp, label=rliabtemp_label) ) + 
  geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory))  + 
  xlab('Monthly Temperature Variance Component') + ylab('-log10(p)') +
  geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +     
    scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
  geom_text_repel( fontface = "bold", size = labelSize) +
    theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text, vjust=.1),
        axis.title.y = element_text(size=size_text)) + 
    theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) 






volcano_plot <- ggarrange(p1,p2, p4 , p5, p6, labels = c( 'b)', 'c)','d)', 'e)' ,'f)' ), common.legend = TRUE, nrow=3,ncol = 2, legend  = 'bottom',
                     font.label = list(size = size_text))
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
barchart_fn_domain

volcano_bar_chart <- ggarrange(barchart_fn_domain,volcano_plot, labels=c('a)',''), nrow = 2,heights   = 1:2,font.label = list(size = size_text))

volcano_bar_chart

ggsave(volcano_bar_chart,
       file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure2.pdf', 
       height = 8, width = 6.9, units='in', device = 'pdf', dpi = 'retina')

Figure 3

size_text <- 8

cost_comorbid_pheno_long <- cost_comorbid_pheno_long %>%
  mutate(statisticType = ifelse(statisticType == 'rSS', 'rtwinSS',statisticType )) %>%
  mutate(statisticType = ifelse(statisticType == 'rOS', 'rtwinOS',statisticType ))


cost_comorbid_ggplot <- ggplot(cost_comorbid_pheno_long, aes(x=as.factor(Description), y=statistic, fill=statisticType)) + 
  geom_bar(stat="identity", color="black", 
           position=position_dodge()) +
  geom_errorbar(aes(ymin=statistic-1.96*standard_error, 
                    ymax=statistic+1.96*standard_error),
                width=.4,
                position=position_dodge(.9)) +
    scale_x_discrete(labels = function(x) str_wrap(x, width = 15)) +
  coord_flip() +
  theme(axis.title = element_text()) + ylab('Statistic Value') + xlab('Phenotype') + labs(color='Statistic Type') +
  theme(legend.position="bottom") +
  theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
    theme(axis.text=element_text(size=size_text))  +
      theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text, vjust=.1),
        axis.title.y = element_text(size=size_text)) 




cost_comorbid_ggplot

comparisonplot_publishedLit <- ggplot(twin_h2_c2_comparison_diff_twin_match , aes(h2_match,h2_liab)) + geom_point() + 
    geom_errorbarh(aes(xmin=h2_match-1.96*h2_match_SE, xmax=h2_match+1.96*h2_match_SE),
                   position = position_dodge(width = 0.01), alpha=.2) +
      geom_errorbar(aes(ymin=h2_liab-1.96*h2_liab_SE, ymax=h2_liab+1.96*h2_liab_SE),
                    position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) +
    geom_smooth(method = "lm", se = TRUE) + xlab('Heritability - Published Studies') + ylab('Heritability - Insurance Study') +
      theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
    theme(axis.text=element_text(size=size_text))  +
      theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text, vjust=.1),
        axis.title.y = element_text(size=size_text)) 






figure3 <- ggarrange(comparisonplot_publishedLit,cost_comorbid_ggplot,labels = c( "a)", 'b)'), nrow=2,ncol = 1,font.label = list(size = size_text))
## Warning: position_dodge requires non-overlapping x intervals
figure3

ggsave(figure3,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure3.pdf',
height = 7, width = 6.9, device = 'pdf', dpi = 'retina')

Figure 4

size_text <- 8


h2_liab_ggplot <- ggplot(metaAll_justMatch_h2, aes(y=h2, x=functional_domain, colour=category)) + 
  geom_point(position = position_dodge(width = .5)) +  
  geom_errorbar(aes(ymin=h2-1.96*h2_SE, ymax=h2+1.96*h2_SE),position = position_dodge(width = 0.5)) + 
  coord_flip() +  
  theme(axis.text=element_text(size=size_text))  + 
  scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
  theme(axis.title = element_text(size=size_text)) + ylab('Heritability (h2)') + xlab('Functional Domain') +
      theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) 



#ggsave(h2_liab_ggplot,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/h2_liab_comparison_match.png', height=3.5, width=6)


c2_liab_ggplot <- ggplot(metaAll_justMatch_c2, aes(y=c2, x=functional_domain, colour=category)) + 
  geom_point(position = position_dodge(width = .5)) +  
  geom_errorbar(aes(ymin=c2-1.96*c2_SE, ymax=c2+1.96*c2_SE),position = position_dodge(width = 0.5)) + 
  coord_flip() +     
  theme(axis.text=element_text(size=size_text))  + 
  scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
  theme(axis.title = element_text(size=size_text)) + ylab('Shared Environmental Variance (c2)') + xlab('Functional Domain') +     theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) 




#ggsave(c2_liab_ggplot,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/c2_liab_comparison_match.png', height=3.5, width=6)



figure4 <- ggarrange(h2_liab_ggplot,c2_liab_ggplot,labels = c( "a)", 'b)'), nrow = 2, ncol = 1, common.legend=TRUE,legend='bottom',font.label = list(size = size_text) )
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
#h2_c2_MaTCH
figure4

ggsave(figure4,
       file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure4.pdf', 
       height = 6, width=6, units = 'in', device='pdf', dpi='retina')

Supplementary Data

Supplementary Figure 1

size_text <- 3

plot_fig <- ggplot(prev, aes(x=functional_domain, y=prevalence)) +
  geom_boxplot(aes(colour=category), outlier.alpha = 0.4,size=.2,outlier.size = .3) + 
   scale_y_log10() +
 ylab('Prevalence') + xlab('Functional Domain') +
  theme(legend.position=c(-.15,-.1),legend.box = "horizontal") +     theme(axis.text=element_text(size=size_text))  +
      theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text,vjust=4),
        axis.title.y = element_text(size=size_text,vjust=-1)) +
  theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
  theme(legend.key.size = unit(.3, "cm")) +
  theme(plot.margin=grid::unit(c(t=.2,r=.1,b=-1,l=0), "mm"))



plot_fig <- ggplot(prev, aes(x=functional_domain, y=prevalence)) +
  geom_boxplot(aes(colour=category), outlier.alpha = 0.4,size=.2,outlier.size = .3) + 
   scale_y_log10() +
 ylab('Prevalence') + xlab('Functional Domain') +
  theme(legend.position=c(-.15,-.1),legend.box = "horizontal") +     
  theme(axis.text=element_text(size=size_text))  +
      theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text,vjust=4),
        axis.title.y = element_text(size=size_text,vjust=-1)) +
  theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
  theme(legend.key.size = unit(.3, "cm")) +
  theme(plot.margin=grid::unit(c(t=.2,r=.1,b=-1,l=0), "mm"))




plot_fig

ggsave(plot_fig, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_1.jpg',dpi='print' , device = 'jpeg', width = 3,height= 2)

Supplementary Figure 2

size_text <- 6


correlation_sibOS_sibSS <- ggplot(allphewas_sibling_correlation , aes(rliabsibling_SS,rliabsibling_OS)) + geom_point(size=.03) + 
    geom_errorbarh(aes(xmin=rliabsibling_SS-1.96*rliabsibling_SE_SS, xmax=rliabsibling_SS+1.96*rliabsibling_SE_SS),
                   position = position_dodge(width = 0.01), alpha=.2, size=.2) +
      geom_errorbar(aes(ymin=rliabsibling_OS-1.96*rliabsibling_SE_OS, ymax=rliabsibling_OS+1.96*rliabsibling_SE_OS),
                    position = position_dodge(width = 0.01), alpha=.2, size=.2) + 
  geom_abline(slope = 1, intercept = 0) +
  xlab('Sibling Same Sex Correlation') + ylab('Sibling Opposite Sex Correlation')  +
  geom_smooth(method = "lm", se = TRUE)  +
    theme(axis.title.x = element_text(size=size_text), 
          axis.title.y = element_text(size=size_text),
        axis.text.y = element_text(size=size_text),
        axis.text.x = element_text(size=size_text)
) 



correlation_sibOS_sibSS
## Warning: position_dodge requires non-overlapping x intervals

ggsave(correlation_sibOS_sibSS, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_2.jpg',dpi='print' , device = 'jpeg', width = 3,height= 2)
## Warning: position_dodge requires non-overlapping x intervals

Supplementary Figure 3

allphewas_sibling_correlation_dff <- allphewas_sibling_correlation %>%
  mutate(rsib_diff=rliabsibling_SS-rliabsibling_OS)

diff_sib_corOS_corSS <- ggplot(allphewas_sibling_correlation_dff, aes(rsib_diff)) +
  geom_histogram(bins = 60) + xlab("Same Sex Correlation - Opposite Sex Correlation") +
      theme(axis.title.x = element_text(size=size_text), 
          axis.title.y = element_text(size=size_text),
        axis.text.y = element_text(size=size_text),
        axis.text.x = element_text(size=size_text)
) 


diff_sib_corOS_corSS

ggsave(diff_sib_corOS_corSS, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_3.jpg',dpi='print', 
       device = 'jpeg', width = 3,height= 2)

Supplementary Figure 4

size_text <- 4

correlation_twinOS_sibOS <- ggplot(allphewas_all_correlation_sibling_twin , aes(rliabos,rliabsibling_OS)) + geom_point(size=.03) + 
    geom_errorbarh(aes(xmin=rliabos-1.96*rliabos_SE, xmax=rliabos+1.96*rliabos_SE),
                   position = position_dodge(width = 0.01), alpha=.2, size=.2) +
      geom_errorbar(aes(ymin=rliabsibling_OS-1.96*rliabsibling_SE_OS, ymax=rliabsibling_OS+1.96*rliabsibling_SE_OS),
                    position = position_dodge(width = 0.01), alpha=.2, size=.2) + geom_abline(slope = 1, intercept = 0) + 
  xlab('Twin Opposite Sex Correlation') + 
  ylab('Sibling Opposite Sex Correlation') +
        theme(axis.title.x = element_text(size=size_text), 
          axis.title.y = element_text(size=size_text),
        axis.text.y = element_text(size=size_text),
        axis.text.x = element_text(size=size_text)
) 





correlation_twinOS_sibSS <- ggplot(allphewas_all_correlation_sibling_twin , aes(rliabos,rliabsibling_SS)) + geom_point(size=.03) + 
    geom_errorbarh(aes(xmin=rliabos-1.96*rliabos_SE, xmax=rliabos+1.96*rliabos_SE),
                   position = position_dodge(width = 0.01), alpha=.2) +
      geom_errorbar(aes(ymin=rliabsibling_SS-1.96*rliabsibling_SE_SS, ymax=rliabsibling_SS+1.96*rliabsibling_SE_SS),
                    position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) + 
  xlab('Twin Opposite Sex Correlation') + 
  ylab('Sibling Same Sex Correlation') +
        theme(axis.title.x = element_text(size=size_text), 
          axis.title.y = element_text(size=size_text),
        axis.text.y = element_text(size=size_text),
        axis.text.x = element_text(size=size_text)
) 



correlation_twinOS_sibSS_sibOS <- ggarrange(correlation_twinOS_sibOS,correlation_twinOS_sibSS,labels = c( 'a)','b)'),  
                      ncol = 2,
                      font.label = list(size = size_text))
## Warning: position_dodge requires non-overlapping x intervals

## Warning: position_dodge requires non-overlapping x intervals
ggsave(correlation_twinOS_sibSS_sibOS, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_4.jpg',dpi='print', 
       device = 'jpeg', width = 3,height= 2)

correlation_twinOS_sibSS_sibOS

Supplementary Figure 5

size_text <- 4

barchart_fn_domain_environment <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) + 
  geom_bar(stat="identity", position="dodge", width = 0.75, colour="black", size=.2) +
  theme(legend.position=c(.5,.75),legend.direction = "horizontal") +     
  theme(axis.text=element_text(size=size_text))  +
      theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text,vjust=4),
        axis.title.y = element_text(size=size_text,vjust=-1)) +
  theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
  theme(legend.key.size = unit(.2, "cm")) 


barchart_fn_domain_environment


ggsave(barchart_fn_domain_environment, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_5.jpg',dpi='print', 
       device = 'jpeg', width = 3,height= 2)

Supplementary Figure 7

size_text <- 4



twinPairsYOB <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twinPairsYOB.csv", 
    "\t", escape_double = FALSE, col_types = cols(phewas_code = col_character()), 
    trim_ws = TRUE)


twinPairsYOB <- twinPairsYOB %>% inner_join(.,quant_pheno_description, by='phewas_code')


twinPairsYOB <- twinPairsYOB %>% mutate(Description = ifelse(phewas_code == 'COST','Binary Phenotypes', Description))

ggplot_ageDistribution <- ggplot(twinPairsYOB, aes(YOB, fill = Description, colour = Description)) + 
  geom_density(alpha = 0.1) + facet_wrap(~Description) +
  theme(axis.text.x=element_text(angle=45, size = 8),
        axis.title.x = element_text(vjust=2)) +
ylab('Density') + xlab('Year of Birth') +
theme(legend.position="none") +
          theme(axis.title.x = element_text(size=size_text), 
          axis.title.y = element_text(size=size_text),
        axis.text.y = element_text(size=size_text),
        axis.text.x = element_text(size=size_text),
      strip.text = element_text(size=size_text)
) 


  


ggplot_ageDistribution

ggsave(ggplot_ageDistribution, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_7.jpg',dpi='print', 
       device = 'jpeg', width = 3,height= 2)

Supplementary Figure 8

coefficients_age_gender <- allphewas_both_functional_domain %>% dplyr::select(phewas_code,functional_domain,beta_gender=`as.factor(Gender)M`, beta_age=MemberBirthYear) %>%
  filter(!is.na(functional_domain))



coefficients_age_gender$functional_domain <- factor(coefficients_age_gender$functional_domain, 
                                                 levels = c('All Traits',
                                                            'Aging',
                                                            'Cardiovascular',
                                                            'Cognitive',
                                                            'Connective tissue',
                                                            'Dermatological',
                                                            'Developmental',
                                                            'Ear,Nose,Throat',
                                                            'Endocrine',
                                                            'Environment',
                                                            'Gastrointestinal',
                                                            'Hematological',
                                                            'Immunological',
                                                            'Infection',
                                                            'Metabolic',
                                                            'Neoplasms',
                                                            'Neurological',
                                                            'Ophthalmological',
                                                            'Psychiatric',
                                                            'Reproduction',
                                                            'Respiratory',
                                                            'Skeletal'))


coefficients_age_gender <- coefficients_age_gender %>% filter(!is.na(functional_domain))


plot_coefficient_gender <- ggplot(coefficients_age_gender, aes(x=functional_domain, y=beta_gender)) +
  geom_boxplot(aes(colour=functional_domain), outlier.alpha = 0.4,size=.2,outlier.size = .3) + 
   scale_y_log10() +
 ylab('Effect Size - Gender') + xlab('Functional Domain') +
  theme(legend.position=c(.3,.95),legend.direction = "horizontal") +     theme(axis.text=element_text(size=size_text))  +
      theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text,vjust=4),
        axis.title.y = element_text(size=size_text,vjust=-1)) +
  theme(legend.text=element_text(size=size_text-2), legend.title = element_text(size=size_text-2)) +
  theme(legend.key.size = unit(.1, "cm")) 

ggsave(plot_coefficient_gender, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_8.jpg',dpi='print',
       device = 'jpeg', width = 3,height= 2)
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 624 rows containing non-finite values (stat_boxplot).
plot_coefficient_gender
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 624 rows containing non-finite values (stat_boxplot).

Supplementary Figure 9

plot_coefficient_age <- ggplot(coefficients_age_gender, aes(x=functional_domain, y=beta_age)) +
  geom_boxplot(aes(colour=functional_domain), outlier.alpha = 0.4,size=.2,outlier.size = .3) + 
   scale_y_log10() +
 ylab('Effect Size - Age') + xlab('Functional Domain') +
  theme(legend.position=c(.3,.95),legend.direction = "horizontal") +     
  theme(axis.text=element_text(size=size_text))  +
      theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75), 
        axis.text.y = element_text(size=size_text),
        axis.title.x = element_text(size=size_text,vjust=4),
        axis.title.y = element_text(size=size_text,vjust=-1)) +
  theme(legend.text=element_text(size=size_text-2), legend.title = element_text(size=size_text-2)) +
  theme(legend.key.size = unit(.1, "cm")) 

ggsave(plot_coefficient_age, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_9.jpg',dpi='print',
       device = 'jpeg', width = 3,height= 2)
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 508 rows containing non-finite values (stat_boxplot).
plot_coefficient_age
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 508 rows containing non-finite values (stat_boxplot).